diff options
Diffstat (limited to 'lib')
300 files changed, 45975 insertions, 1337 deletions
diff --git a/lib/addns/dnsgss.c b/lib/addns/dnsgss.c index c9037417da..19b734a6a3 100644 --- a/lib/addns/dnsgss.c +++ b/lib/addns/dnsgss.c @@ -92,7 +92,7 @@ static DNS_ERROR dns_negotiate_gss_ctx_int( TALLOC_CTX *mem_ctx, DNS_ERROR err; gss_OID_desc krb5_oid_desc = - { 9, (char *)"\x2a\x86\x48\x86\xf7\x12\x01\x02\x02" }; + { 9, (const char *)"\x2a\x86\x48\x86\xf7\x12\x01\x02\x02" }; *ctx = GSS_C_NO_CONTEXT; input_ptr = NULL; @@ -222,7 +222,7 @@ DNS_ERROR dns_negotiate_sec_ctx( const char *target_realm, gss_name_t targ_name; gss_OID_desc nt_host_oid_desc = - {10, (char *)"\x2a\x86\x48\x86\xf7\x12\x01\x02\x02\x01"}; + {10, (const char *)"\x2a\x86\x48\x86\xf7\x12\x01\x02\x02\x01"}; TALLOC_CTX *mem_ctx; diff --git a/lib/addns/dnsmarshall.c b/lib/addns/dnsmarshall.c index 5530290c57..59d6470f34 100644 --- a/lib/addns/dnsmarshall.c +++ b/lib/addns/dnsmarshall.c @@ -39,7 +39,7 @@ struct dns_buffer *dns_create_buffer(TALLOC_CTX *mem_ctx) */ result->size = 2; - if (!(result->data = TALLOC_ARRAY(result, uint8, result->size))) { + if (!(result->data = talloc_array(result, uint8, result->size))) { TALLOC_FREE(result); return NULL; } @@ -78,7 +78,7 @@ void dns_marshall_buffer(struct dns_buffer *buf, const uint8 *data, new_size += (64 - (new_size % 64)); - if (!(new_data = TALLOC_REALLOC_ARRAY(buf, buf->data, uint8, + if (!(new_data = talloc_realloc(buf, buf->data, uint8, new_size))) { buf->error = ERROR_DNS_NO_MEMORY; return; @@ -223,7 +223,7 @@ static void dns_unmarshall_label(TALLOC_CTX *mem_ctx, label->len = len; - if (!(label->label = TALLOC_ARRAY(label, char, len+1))) { + if (!(label->label = talloc_array(label, char, len+1))) { buf->error = ERROR_DNS_NO_MEMORY; goto error; } @@ -329,7 +329,7 @@ static void dns_unmarshall_rr(TALLOC_CTX *mem_ctx, if (!(ERR_DNS_IS_OK(buf->error))) return; if (r->data_length != 0) { - if (!(r->data = TALLOC_ARRAY(r, uint8, r->data_length))) { + if (!(r->data = talloc_array(r, uint8, r->data_length))) { buf->error = ERROR_DNS_NO_MEMORY; return; } @@ -390,7 +390,7 @@ DNS_ERROR dns_unmarshall_request(TALLOC_CTX *mem_ctx, uint16 i; DNS_ERROR err; - if (!(req = TALLOC_ZERO_P(mem_ctx, struct dns_request))) { + if (!(req = talloc_zero(mem_ctx, struct dns_request))) { return ERROR_DNS_NO_MEMORY; } @@ -406,22 +406,22 @@ DNS_ERROR dns_unmarshall_request(TALLOC_CTX *mem_ctx, err = ERROR_DNS_NO_MEMORY; if ((req->num_questions != 0) && - !(req->questions = TALLOC_ARRAY(req, struct dns_question *, + !(req->questions = talloc_array(req, struct dns_question *, req->num_questions))) { goto error; } if ((req->num_answers != 0) && - !(req->answers = TALLOC_ARRAY(req, struct dns_rrec *, + !(req->answers = talloc_array(req, struct dns_rrec *, req->num_answers))) { goto error; } if ((req->num_auths != 0) && - !(req->auths = TALLOC_ARRAY(req, struct dns_rrec *, + !(req->auths = talloc_array(req, struct dns_rrec *, req->num_auths))) { goto error; } if ((req->num_additionals != 0) && - !(req->additionals = TALLOC_ARRAY(req, struct dns_rrec *, + !(req->additionals = talloc_array(req, struct dns_rrec *, req->num_additionals))) { goto error; } diff --git a/lib/addns/dnsrecord.c b/lib/addns/dnsrecord.c index 559c2644d4..2240d08fb9 100644 --- a/lib/addns/dnsrecord.c +++ b/lib/addns/dnsrecord.c @@ -31,8 +31,8 @@ DNS_ERROR dns_create_query( TALLOC_CTX *mem_ctx, const char *name, struct dns_question *q; DNS_ERROR err; - if (!(req = TALLOC_ZERO_P(mem_ctx, struct dns_request)) || - !(req->questions = TALLOC_ARRAY(req, struct dns_question *, 1)) || + if (!(req = talloc_zero(mem_ctx, struct dns_request)) || + !(req->questions = talloc_array(req, struct dns_question *, 1)) || !(req->questions[0] = talloc(req->questions, struct dns_question))) { TALLOC_FREE(req); @@ -64,8 +64,8 @@ DNS_ERROR dns_create_update( TALLOC_CTX *mem_ctx, const char *name, struct dns_zone *z; DNS_ERROR err; - if (!(req = TALLOC_ZERO_P(mem_ctx, struct dns_update_request)) || - !(req->zones = TALLOC_ARRAY(req, struct dns_zone *, 1)) || + if (!(req = talloc_zero(mem_ctx, struct dns_update_request)) || + !(req->zones = talloc_array(req, struct dns_zone *, 1)) || !(req->zones[0] = talloc(req->zones, struct dns_zone))) { TALLOC_FREE(req); return ERROR_DNS_NO_MEMORY; @@ -131,8 +131,8 @@ DNS_ERROR dns_create_a_record(TALLOC_CTX *mem_ctx, const char *host, return ERROR_DNS_SUCCESS; } - ip = ((struct sockaddr_in *)pss)->sin_addr; - if (!(data = (uint8 *)TALLOC_MEMDUP(mem_ctx, (const void *)&ip.s_addr, + ip = ((const struct sockaddr_in *)pss)->sin_addr; + if (!(data = (uint8 *)talloc_memdup(mem_ctx, (const void *)&ip.s_addr, sizeof(ip.s_addr)))) { return ERROR_DNS_NO_MEMORY; } @@ -240,7 +240,7 @@ DNS_ERROR dns_unmarshall_tkey_record(TALLOC_CTX *mem_ctx, struct dns_rrec *rec, if (!ERR_DNS_IS_OK(buf.error)) goto error; if (tkey->key_length) { - if (!(tkey->key = TALLOC_ARRAY(tkey, uint8, tkey->key_length))) { + if (!(tkey->key = talloc_array(tkey, uint8, tkey->key_length))) { buf.error = ERROR_DNS_NO_MEMORY; goto error; } @@ -308,7 +308,7 @@ DNS_ERROR dns_add_rrec(TALLOC_CTX *mem_ctx, struct dns_rrec *rec, { struct dns_rrec **new_records; - if (!(new_records = TALLOC_REALLOC_ARRAY(mem_ctx, *records, + if (!(new_records = talloc_realloc(mem_ctx, *records, struct dns_rrec *, (*num_records)+1))) { return ERROR_DNS_NO_MEMORY; diff --git a/lib/addns/dnssock.c b/lib/addns/dnssock.c index 42b4e2d40f..aaeb3f03fa 100644 --- a/lib/addns/dnssock.c +++ b/lib/addns/dnssock.c @@ -250,7 +250,7 @@ static DNS_ERROR dns_receive_tcp(TALLOC_CTX *mem_ctx, DNS_ERROR err; uint16 len; - if (!(buf = TALLOC_ZERO_P(mem_ctx, struct dns_buffer))) { + if (!(buf = talloc_zero(mem_ctx, struct dns_buffer))) { return ERROR_DNS_NO_MEMORY; } @@ -262,7 +262,7 @@ static DNS_ERROR dns_receive_tcp(TALLOC_CTX *mem_ctx, buf->size = ntohs(len); if (buf->size) { - if (!(buf->data = TALLOC_ARRAY(buf, uint8, buf->size))) { + if (!(buf->data = talloc_array(buf, uint8, buf->size))) { TALLOC_FREE(buf); return ERROR_DNS_NO_MEMORY; } @@ -287,7 +287,7 @@ static DNS_ERROR dns_receive_udp(TALLOC_CTX *mem_ctx, struct dns_buffer *buf; ssize_t received; - if (!(buf = TALLOC_ZERO_P(mem_ctx, struct dns_buffer))) { + if (!(buf = talloc_zero(mem_ctx, struct dns_buffer))) { return ERROR_DNS_NO_MEMORY; } @@ -295,7 +295,7 @@ static DNS_ERROR dns_receive_udp(TALLOC_CTX *mem_ctx, * UDP based DNS can only be 512 bytes */ - if (!(buf->data = TALLOC_ARRAY(buf, uint8, 512))) { + if (!(buf->data = talloc_array(buf, uint8, 512))) { TALLOC_FREE(buf); return ERROR_DNS_NO_MEMORY; } diff --git a/lib/addns/dnsutils.c b/lib/addns/dnsutils.c index 37b862c7f0..43305a9873 100644 --- a/lib/addns/dnsutils.c +++ b/lib/addns/dnsutils.c @@ -53,7 +53,7 @@ static DNS_ERROR LabelList( TALLOC_CTX *mem_ctx, return ERROR_DNS_INVALID_NAME; } - if (!(result = TALLOC_ZERO_P(mem_ctx, struct dns_domain_label))) { + if (!(result = talloc_zero(mem_ctx, struct dns_domain_label))) { return ERROR_DNS_NO_MEMORY; } @@ -138,7 +138,7 @@ char *dns_generate_keyname( TALLOC_CTX *mem_ctx ) /* * uuid_unparse gives 36 bytes plus '\0' */ - if (!(result = TALLOC_ARRAY(mem_ctx, char, 37))) { + if (!(result = talloc_array(mem_ctx, char, 37))) { return NULL; } diff --git a/lib/async_req/async_sock.c b/lib/async_req/async_sock.c index 86053d94e8..dfb1a1cdbd 100644 --- a/lib/async_req/async_sock.c +++ b/lib/async_req/async_sock.c @@ -386,6 +386,7 @@ struct writev_state { int count; size_t total_size; uint16_t flags; + bool err_on_readability; }; static void writev_trigger(struct tevent_req *req, void *private_data); @@ -413,10 +414,8 @@ struct tevent_req *writev_send(TALLOC_CTX *mem_ctx, struct tevent_context *ev, if (state->iov == NULL) { goto fail; } - state->flags = TEVENT_FD_WRITE; - if (err_on_readability) { - state->flags |= TEVENT_FD_READ; - } + state->flags = TEVENT_FD_WRITE|TEVENT_FD_READ; + state->err_on_readability = err_on_readability; if (queue == NULL) { struct tevent_fd *fde; @@ -462,8 +461,35 @@ static void writev_handler(struct tevent_context *ev, struct tevent_fd *fde, to_write = 0; if ((state->flags & TEVENT_FD_READ) && (flags & TEVENT_FD_READ)) { - tevent_req_error(req, EPIPE); - return; + int ret, value; + + if (state->err_on_readability) { + /* Readable and the caller wants an error on read. */ + tevent_req_error(req, EPIPE); + return; + } + + /* Might be an error. Check if there are bytes to read */ + ret = ioctl(state->fd, FIONREAD, &value); + /* FIXME - should we also check + for ret == 0 and value == 0 here ? */ + if (ret == -1) { + /* There's an error. */ + tevent_req_error(req, EPIPE); + return; + } + /* A request for TEVENT_FD_READ will succeed from now and + forevermore until the bytes are read so if there was + an error we'll wait until we do read, then get it in + the read callback function. Until then, remove TEVENT_FD_READ + from the flags we're waiting for. */ + state->flags &= ~TEVENT_FD_READ; + TEVENT_FD_NOT_READABLE(fde); + + /* If not writable, we're done. */ + if (!(flags & TEVENT_FD_WRITE)) { + return; + } } for (i=0; i<state->count; i++) { diff --git a/lib/ccan/array_size/LICENSE b/lib/ccan/array_size/LICENSE new file mode 100644 index 0000000000..5522aa5f33 --- /dev/null +++ b/lib/ccan/array_size/LICENSE @@ -0,0 +1,508 @@ + + GNU LESSER GENERAL PUBLIC LICENSE + Version 2.1, February 1999 + + Copyright (C) 1991, 1999 Free Software Foundation, Inc. + 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + +[This is the first released version of the Lesser GPL. It also counts + as the successor of the GNU Library Public License, version 2, hence + the version number 2.1.] + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +Licenses are intended to guarantee your freedom to share and change +free software--to make sure the software is free for all its users. + + This license, the Lesser General Public License, applies to some +specially designated software packages--typically libraries--of the +Free Software Foundation and other authors who decide to use it. You +can use it too, but we suggest you first think carefully about whether +this license or the ordinary General Public License is the better +strategy to use in any particular case, based on the explanations +below. + + When we speak of free software, we are referring to freedom of use, +not price. Our General Public Licenses are designed to make sure that +you have the freedom to distribute copies of free software (and charge +for this service if you wish); that you receive source code or can get +it if you want it; that you can change the software and use pieces of +it in new free programs; and that you are informed that you can do +these things. + + To protect your rights, we need to make restrictions that forbid +distributors to deny you these rights or to ask you to surrender these +rights. These restrictions translate to certain responsibilities for +you if you distribute copies of the library or if you modify it. + + For example, if you distribute copies of the library, whether gratis +or for a fee, you must give the recipients all the rights that we gave +you. You must make sure that they, too, receive or can get the source +code. If you link other code with the library, you must provide +complete object files to the recipients, so that they can relink them +with the library after making changes to the library and recompiling +it. And you must show them these terms so they know their rights. + + We protect your rights with a two-step method: (1) we copyright the +library, and (2) we offer you this license, which gives you legal +permission to copy, distribute and/or modify the library. + + To protect each distributor, we want to make it very clear that +there is no warranty for the free library. Also, if the library is +modified by someone else and passed on, the recipients should know +that what they have is not the original version, so that the original +author's reputation will not be affected by problems that might be +introduced by others. + + Finally, software patents pose a constant threat to the existence of +any free program. We wish to make sure that a company cannot +effectively restrict the users of a free program by obtaining a +restrictive license from a patent holder. Therefore, we insist that +any patent license obtained for a version of the library must be +consistent with the full freedom of use specified in this license. + + Most GNU software, including some libraries, is covered by the +ordinary GNU General Public License. This license, the GNU Lesser +General Public License, applies to certain designated libraries, and +is quite different from the ordinary General Public License. We use +this license for certain libraries in order to permit linking those +libraries into non-free programs. + + When a program is linked with a library, whether statically or using +a shared library, the combination of the two is legally speaking a +combined work, a derivative of the original library. The ordinary +General Public License therefore permits such linking only if the +entire combination fits its criteria of freedom. The Lesser General +Public License permits more lax criteria for linking other code with +the library. + + We call this license the "Lesser" General Public License because it +does Less to protect the user's freedom than the ordinary General +Public License. It also provides other free software developers Less +of an advantage over competing non-free programs. These disadvantages +are the reason we use the ordinary General Public License for many +libraries. However, the Lesser license provides advantages in certain +special circumstances. + + For example, on rare occasions, there may be a special need to +encourage the widest possible use of a certain library, so that it +becomes a de-facto standard. To achieve this, non-free programs must +be allowed to use the library. A more frequent case is that a free +library does the same job as widely used non-free libraries. In this +case, there is little to gain by limiting the free library to free +software only, so we use the Lesser General Public License. + + In other cases, permission to use a particular library in non-free +programs enables a greater number of people to use a large body of +free software. For example, permission to use the GNU C Library in +non-free programs enables many more people to use the whole GNU +operating system, as well as its variant, the GNU/Linux operating +system. + + Although the Lesser General Public License is Less protective of the +users' freedom, it does ensure that the user of a program that is +linked with the Library has the freedom and the wherewithal to run +that program using a modified version of the Library. + + The precise terms and conditions for copying, distribution and +modification follow. Pay close attention to the difference between a +"work based on the library" and a "work that uses the library". The +former contains code derived from the library, whereas the latter must +be combined with the library in order to run. + + GNU LESSER GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License Agreement applies to any software library or other +program which contains a notice placed by the copyright holder or +other authorized party saying it may be distributed under the terms of +this Lesser General Public License (also called "this License"). +Each licensee is addressed as "you". + + A "library" means a collection of software functions and/or data +prepared so as to be conveniently linked with application programs +(which use some of those functions and data) to form executables. + + The "Library", below, refers to any such software library or work +which has been distributed under these terms. A "work based on the +Library" means either the Library or any derivative work under +copyright law: that is to say, a work containing the Library or a +portion of it, either verbatim or with modifications and/or translated +straightforwardly into another language. (Hereinafter, translation is +included without limitation in the term "modification".) + + "Source code" for a work means the preferred form of the work for +making modifications to it. For a library, complete source code means +all the source code for all modules it contains, plus any associated +interface definition files, plus the scripts used to control +compilation and installation of the library. + + Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running a program using the Library is not restricted, and output from +such a program is covered only if its contents constitute a work based +on the Library (independent of the use of the Library in a tool for +writing it). Whether that is true depends on what the Library does +and what the program that uses the Library does. + + 1. You may copy and distribute verbatim copies of the Library's +complete source code as you receive it, in any medium, provided that +you conspicuously and appropriately publish on each copy an +appropriate copyright notice and disclaimer of warranty; keep intact +all the notices that refer to this License and to the absence of any +warranty; and distribute a copy of this License along with the +Library. + + You may charge a fee for the physical act of transferring a copy, +and you may at your option offer warranty protection in exchange for a +fee. + + 2. You may modify your copy or copies of the Library or any portion +of it, thus forming a work based on the Library, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) The modified work must itself be a software library. + + b) You must cause the files modified to carry prominent notices + stating that you changed the files and the date of any change. + + c) You must cause the whole of the work to be licensed at no + charge to all third parties under the terms of this License. + + d) If a facility in the modified Library refers to a function or a + table of data to be supplied by an application program that uses + the facility, other than as an argument passed when the facility + is invoked, then you must make a good faith effort to ensure that, + in the event an application does not supply such function or + table, the facility still operates, and performs whatever part of + its purpose remains meaningful. + + (For example, a function in a library to compute square roots has + a purpose that is entirely well-defined independent of the + application. Therefore, Subsection 2d requires that any + application-supplied function or table used by this function must + be optional: if the application does not supply it, the square + root function must still compute square roots.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Library, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Library, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote +it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Library. + +In addition, mere aggregation of another work not based on the Library +with the Library (or with a work based on the Library) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may opt to apply the terms of the ordinary GNU General Public +License instead of this License to a given copy of the Library. To do +this, you must alter all the notices that refer to this License, so +that they refer to the ordinary GNU General Public License, version 2, +instead of to this License. (If a newer version than version 2 of the +ordinary GNU General Public License has appeared, then you can specify +that version instead if you wish.) Do not make any other change in +these notices. + + Once this change is made in a given copy, it is irreversible for +that copy, so the ordinary GNU General Public License applies to all +subsequent copies and derivative works made from that copy. + + This option is useful when you wish to copy part of the code of +the Library into a program that is not a library. + + 4. You may copy and distribute the Library (or a portion or +derivative of it, under Section 2) in object code or executable form +under the terms of Sections 1 and 2 above provided that you accompany +it with the complete corresponding machine-readable source code, which +must be distributed under the terms of Sections 1 and 2 above on a +medium customarily used for software interchange. + + If distribution of object code is made by offering access to copy +from a designated place, then offering equivalent access to copy the +source code from the same place satisfies the requirement to +distribute the source code, even though third parties are not +compelled to copy the source along with the object code. + + 5. A program that contains no derivative of any portion of the +Library, but is designed to work with the Library by being compiled or +linked with it, is called a "work that uses the Library". Such a +work, in isolation, is not a derivative work of the Library, and +therefore falls outside the scope of this License. + + However, linking a "work that uses the Library" with the Library +creates an executable that is a derivative of the Library (because it +contains portions of the Library), rather than a "work that uses the +library". The executable is therefore covered by this License. +Section 6 states terms for distribution of such executables. + + When a "work that uses the Library" uses material from a header file +that is part of the Library, the object code for the work may be a +derivative work of the Library even though the source code is not. +Whether this is true is especially significant if the work can be +linked without the Library, or if the work is itself a library. The +threshold for this to be true is not precisely defined by law. + + If such an object file uses only numerical parameters, data +structure layouts and accessors, and small macros and small inline +functions (ten lines or less in length), then the use of the object +file is unrestricted, regardless of whether it is legally a derivative +work. (Executables containing this object code plus portions of the +Library will still fall under Section 6.) + + Otherwise, if the work is a derivative of the Library, you may +distribute the object code for the work under the terms of Section 6. +Any executables containing that work also fall under Section 6, +whether or not they are linked directly with the Library itself. + + 6. As an exception to the Sections above, you may also combine or +link a "work that uses the Library" with the Library to produce a +work containing portions of the Library, and distribute that work +under terms of your choice, provided that the terms permit +modification of the work for the customer's own use and reverse +engineering for debugging such modifications. + + You must give prominent notice with each copy of the work that the +Library is used in it and that the Library and its use are covered by +this License. You must supply a copy of this License. If the work +during execution displays copyright notices, you must include the +copyright notice for the Library among them, as well as a reference +directing the user to the copy of this License. Also, you must do one +of these things: + + a) Accompany the work with the complete corresponding + machine-readable source code for the Library including whatever + changes were used in the work (which must be distributed under + Sections 1 and 2 above); and, if the work is an executable linked + with the Library, with the complete machine-readable "work that + uses the Library", as object code and/or source code, so that the + user can modify the Library and then relink to produce a modified + executable containing the modified Library. (It is understood + that the user who changes the contents of definitions files in the + Library will not necessarily be able to recompile the application + to use the modified definitions.) + + b) Use a suitable shared library mechanism for linking with the + Library. A suitable mechanism is one that (1) uses at run time a + copy of the library already present on the user's computer system, + rather than copying library functions into the executable, and (2) + will operate properly with a modified version of the library, if + the user installs one, as long as the modified version is + interface-compatible with the version that the work was made with. + + c) Accompany the work with a written offer, valid for at least + three years, to give the same user the materials specified in + Subsection 6a, above, for a charge no more than the cost of + performing this distribution. + + d) If distribution of the work is made by offering access to copy + from a designated place, offer equivalent access to copy the above + specified materials from the same place. + + e) Verify that the user has already received a copy of these + materials or that you have already sent this user a copy. + + For an executable, the required form of the "work that uses the +Library" must include any data and utility programs needed for +reproducing the executable from it. However, as a special exception, +the materials to be distributed need not include anything that is +normally distributed (in either source or binary form) with the major +components (compiler, kernel, and so on) of the operating system on +which the executable runs, unless that component itself accompanies +the executable. + + It may happen that this requirement contradicts the license +restrictions of other proprietary libraries that do not normally +accompany the operating system. Such a contradiction means you cannot +use both them and the Library together in an executable that you +distribute. + + 7. You may place library facilities that are a work based on the +Library side-by-side in a single library together with other library +facilities not covered by this License, and distribute such a combined +library, provided that the separate distribution of the work based on +the Library and of the other library facilities is otherwise +permitted, and provided that you do these two things: + + a) Accompany the combined library with a copy of the same work + based on the Library, uncombined with any other library + facilities. This must be distributed under the terms of the + Sections above. + + b) Give prominent notice with the combined library of the fact + that part of it is a work based on the Library, and explaining + where to find the accompanying uncombined form of the same work. + + 8. You may not copy, modify, sublicense, link with, or distribute +the Library except as expressly provided under this License. Any +attempt otherwise to copy, modify, sublicense, link with, or +distribute the Library is void, and will automatically terminate your +rights under this License. However, parties who have received copies, +or rights, from you under this License will not have their licenses +terminated so long as such parties remain in full compliance. + + 9. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Library or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Library (or any work based on the +Library), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Library or works based on it. + + 10. Each time you redistribute the Library (or any work based on the +Library), the recipient automatically receives a license from the +original licensor to copy, distribute, link with or modify the Library +subject to these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties with +this License. + + 11. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Library at all. For example, if a patent +license would not permit royalty-free redistribution of the Library by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Library. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply, and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 12. If the distribution and/or use of the Library is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Library under this License +may add an explicit geographical distribution limitation excluding those +countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 13. The Free Software Foundation may publish revised and/or new +versions of the Lesser General Public License from time to time. +Such new versions will be similar in spirit to the present version, +but may differ in detail to address new problems or concerns. + +Each version is given a distinguishing version number. If the Library +specifies a version number of this License which applies to it and +"any later version", you have the option of following the terms and +conditions either of that version or of any later version published by +the Free Software Foundation. If the Library does not specify a +license version number, you may choose any version ever published by +the Free Software Foundation. + + 14. If you wish to incorporate parts of the Library into other free +programs whose distribution conditions are incompatible with these, +write to the author to ask for permission. For software which is +copyrighted by the Free Software Foundation, write to the Free +Software Foundation; we sometimes make exceptions for this. Our +decision will be guided by the two goals of preserving the free status +of all derivatives of our free software and of promoting the sharing +and reuse of software generally. + + NO WARRANTY + + 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO +WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. +EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR +OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY +KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE +LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME +THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN +WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY +AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU +FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR +CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE +LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING +RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A +FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF +SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH +DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Libraries + + If you develop a new library, and you want it to be of the greatest +possible use to the public, we recommend making it free software that +everyone can redistribute and change. You can do so by permitting +redistribution under these terms (or, alternatively, under the terms +of the ordinary General Public License). + + To apply these terms, attach the following notices to the library. +It is safest to attach them to the start of each source file to most +effectively convey the exclusion of warranty; and each file should +have at least the "copyright" line and a pointer to where the full +notice is found. + + + <one line to give the library's name and a brief idea of what it does.> + Copyright (C) <year> <name of author> + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +Also add information on how to contact you by electronic and paper mail. + +You should also get your employer (if you work as a programmer) or +your school, if any, to sign a "copyright disclaimer" for the library, +if necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the + library `Frob' (a library for tweaking knobs) written by James + Random Hacker. + + <signature of Ty Coon>, 1 April 1990 + Ty Coon, President of Vice + +That's all there is to it! diff --git a/lib/ccan/array_size/_info b/lib/ccan/array_size/_info new file mode 100644 index 0000000000..af7ef1cfd2 --- /dev/null +++ b/lib/ccan/array_size/_info @@ -0,0 +1,46 @@ +#include <stdio.h> +#include <string.h> +#include "config.h" + +/** + * array_size - routine for safely deriving the size of a visible array. + * + * This provides a simple ARRAY_SIZE() macro, which (given a good compiler) + * will also break compile if you try to use it on a pointer. + * + * This can ensure your code is robust to changes, without needing a gratuitous + * macro or constant. + * + * Example: + * // Outputs "Initialized 32 values" + * #include <ccan/array_size/array_size.h> + * #include <stdlib.h> + * #include <stdio.h> + * + * // We currently use 32 random values. + * static unsigned int vals[32]; + * + * int main(void) + * { + * unsigned int i; + * for (i = 0; i < ARRAY_SIZE(vals); i++) + * vals[i] = random(); + * printf("Initialized %u values\n", i); + * return 0; + * } + * + * License: LGPL (2 or any later version) + * Author: Rusty Russell <rusty@rustcorp.com.au> + */ +int main(int argc, char *argv[]) +{ + if (argc != 2) + return 1; + + if (strcmp(argv[1], "depends") == 0) { + printf("ccan/build_assert\n"); + return 0; + } + + return 1; +} diff --git a/lib/ccan/array_size/array_size.h b/lib/ccan/array_size/array_size.h new file mode 100644 index 0000000000..0876945c5e --- /dev/null +++ b/lib/ccan/array_size/array_size.h @@ -0,0 +1,25 @@ +#ifndef CCAN_ARRAY_SIZE_H +#define CCAN_ARRAY_SIZE_H +#include "config.h" +#include <ccan/build_assert/build_assert.h> + +/** + * ARRAY_SIZE - get the number of elements in a visible array + * @arr: the array whose size you want. + * + * This does not work on pointers, or arrays declared as [], or + * function parameters. With correct compiler support, such usage + * will cause a build error (see build_assert). + */ +#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]) + _array_size_chk(arr)) + +#if HAVE_BUILTIN_TYPES_COMPATIBLE_P && HAVE_TYPEOF +/* Two gcc extensions. + * &a[0] degrades to a pointer: a different type from an array */ +#define _array_size_chk(arr) \ + BUILD_ASSERT_OR_ZERO(!__builtin_types_compatible_p(typeof(arr), \ + typeof(&(arr)[0]))) +#else +#define _array_size_chk(arr) 0 +#endif +#endif /* CCAN_ALIGNOF_H */ diff --git a/lib/ccan/array_size/test/compile_fail-function-param.c b/lib/ccan/array_size/test/compile_fail-function-param.c new file mode 100644 index 0000000000..cb64d98424 --- /dev/null +++ b/lib/ccan/array_size/test/compile_fail-function-param.c @@ -0,0 +1,24 @@ +#include <ccan/array_size/array_size.h> +#include <stdlib.h> + +struct foo { + unsigned int a, b; +}; + +int check_parameter(const struct foo array[4]); +int check_parameter(const struct foo array[4]) +{ +#ifdef FAIL + return (ARRAY_SIZE(array) == 4); +#if !HAVE_TYPEOF || !HAVE_BUILTIN_TYPES_COMPATIBLE_P +#error "Unfortunately we don't fail if _array_size_chk is a noop." +#endif +#else + return sizeof(array) == 4 * sizeof(struct foo); +#endif +} + +int main(int argc, char *argv[]) +{ + return check_parameter(NULL); +} diff --git a/lib/ccan/array_size/test/compile_fail.c b/lib/ccan/array_size/test/compile_fail.c new file mode 100644 index 0000000000..37d315f219 --- /dev/null +++ b/lib/ccan/array_size/test/compile_fail.c @@ -0,0 +1,14 @@ +#include <ccan/array_size/array_size.h> + +int main(int argc, char *argv[8]) +{ + char array[100]; +#ifdef FAIL + return ARRAY_SIZE(argv) + ARRAY_SIZE(array); +#if !HAVE_TYPEOF || !HAVE_BUILTIN_TYPES_COMPATIBLE_P +#error "Unfortunately we don't fail if _array_size_chk is a noop." +#endif +#else + return ARRAY_SIZE(array); +#endif +} diff --git a/lib/ccan/array_size/test/run.c b/lib/ccan/array_size/test/run.c new file mode 100644 index 0000000000..37b4200b44 --- /dev/null +++ b/lib/ccan/array_size/test/run.c @@ -0,0 +1,33 @@ +#include <ccan/array_size/array_size.h> +#include <ccan/tap/tap.h> + +static char array1[1]; +static int array2[2]; +static unsigned long array3[3][5]; +struct foo { + unsigned int a, b; + char string[100]; +}; +static struct foo array4[4]; + +/* Make sure they can be used in initializers. */ +static int array1_size = ARRAY_SIZE(array1); +static int array2_size = ARRAY_SIZE(array2); +static int array3_size = ARRAY_SIZE(array3); +static int array4_size = ARRAY_SIZE(array4); + +int main(int argc, char *argv[]) +{ + plan_tests(8); + ok1(array1_size == 1); + ok1(array2_size == 2); + ok1(array3_size == 3); + ok1(array4_size == 4); + + ok1(ARRAY_SIZE(array1) == 1); + ok1(ARRAY_SIZE(array2) == 2); + ok1(ARRAY_SIZE(array3) == 3); + ok1(ARRAY_SIZE(array4) == 4); + + return exit_status(); +} diff --git a/lib/ccan/asearch/LICENSE b/lib/ccan/asearch/LICENSE new file mode 100644 index 0000000000..5522aa5f33 --- /dev/null +++ b/lib/ccan/asearch/LICENSE @@ -0,0 +1,508 @@ + + GNU LESSER GENERAL PUBLIC LICENSE + Version 2.1, February 1999 + + Copyright (C) 1991, 1999 Free Software Foundation, Inc. + 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + +[This is the first released version of the Lesser GPL. It also counts + as the successor of the GNU Library Public License, version 2, hence + the version number 2.1.] + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +Licenses are intended to guarantee your freedom to share and change +free software--to make sure the software is free for all its users. + + This license, the Lesser General Public License, applies to some +specially designated software packages--typically libraries--of the +Free Software Foundation and other authors who decide to use it. You +can use it too, but we suggest you first think carefully about whether +this license or the ordinary General Public License is the better +strategy to use in any particular case, based on the explanations +below. + + When we speak of free software, we are referring to freedom of use, +not price. Our General Public Licenses are designed to make sure that +you have the freedom to distribute copies of free software (and charge +for this service if you wish); that you receive source code or can get +it if you want it; that you can change the software and use pieces of +it in new free programs; and that you are informed that you can do +these things. + + To protect your rights, we need to make restrictions that forbid +distributors to deny you these rights or to ask you to surrender these +rights. These restrictions translate to certain responsibilities for +you if you distribute copies of the library or if you modify it. + + For example, if you distribute copies of the library, whether gratis +or for a fee, you must give the recipients all the rights that we gave +you. You must make sure that they, too, receive or can get the source +code. If you link other code with the library, you must provide +complete object files to the recipients, so that they can relink them +with the library after making changes to the library and recompiling +it. And you must show them these terms so they know their rights. + + We protect your rights with a two-step method: (1) we copyright the +library, and (2) we offer you this license, which gives you legal +permission to copy, distribute and/or modify the library. + + To protect each distributor, we want to make it very clear that +there is no warranty for the free library. Also, if the library is +modified by someone else and passed on, the recipients should know +that what they have is not the original version, so that the original +author's reputation will not be affected by problems that might be +introduced by others. + + Finally, software patents pose a constant threat to the existence of +any free program. We wish to make sure that a company cannot +effectively restrict the users of a free program by obtaining a +restrictive license from a patent holder. Therefore, we insist that +any patent license obtained for a version of the library must be +consistent with the full freedom of use specified in this license. + + Most GNU software, including some libraries, is covered by the +ordinary GNU General Public License. This license, the GNU Lesser +General Public License, applies to certain designated libraries, and +is quite different from the ordinary General Public License. We use +this license for certain libraries in order to permit linking those +libraries into non-free programs. + + When a program is linked with a library, whether statically or using +a shared library, the combination of the two is legally speaking a +combined work, a derivative of the original library. The ordinary +General Public License therefore permits such linking only if the +entire combination fits its criteria of freedom. The Lesser General +Public License permits more lax criteria for linking other code with +the library. + + We call this license the "Lesser" General Public License because it +does Less to protect the user's freedom than the ordinary General +Public License. It also provides other free software developers Less +of an advantage over competing non-free programs. These disadvantages +are the reason we use the ordinary General Public License for many +libraries. However, the Lesser license provides advantages in certain +special circumstances. + + For example, on rare occasions, there may be a special need to +encourage the widest possible use of a certain library, so that it +becomes a de-facto standard. To achieve this, non-free programs must +be allowed to use the library. A more frequent case is that a free +library does the same job as widely used non-free libraries. In this +case, there is little to gain by limiting the free library to free +software only, so we use the Lesser General Public License. + + In other cases, permission to use a particular library in non-free +programs enables a greater number of people to use a large body of +free software. For example, permission to use the GNU C Library in +non-free programs enables many more people to use the whole GNU +operating system, as well as its variant, the GNU/Linux operating +system. + + Although the Lesser General Public License is Less protective of the +users' freedom, it does ensure that the user of a program that is +linked with the Library has the freedom and the wherewithal to run +that program using a modified version of the Library. + + The precise terms and conditions for copying, distribution and +modification follow. Pay close attention to the difference between a +"work based on the library" and a "work that uses the library". The +former contains code derived from the library, whereas the latter must +be combined with the library in order to run. + + GNU LESSER GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License Agreement applies to any software library or other +program which contains a notice placed by the copyright holder or +other authorized party saying it may be distributed under the terms of +this Lesser General Public License (also called "this License"). +Each licensee is addressed as "you". + + A "library" means a collection of software functions and/or data +prepared so as to be conveniently linked with application programs +(which use some of those functions and data) to form executables. + + The "Library", below, refers to any such software library or work +which has been distributed under these terms. A "work based on the +Library" means either the Library or any derivative work under +copyright law: that is to say, a work containing the Library or a +portion of it, either verbatim or with modifications and/or translated +straightforwardly into another language. (Hereinafter, translation is +included without limitation in the term "modification".) + + "Source code" for a work means the preferred form of the work for +making modifications to it. For a library, complete source code means +all the source code for all modules it contains, plus any associated +interface definition files, plus the scripts used to control +compilation and installation of the library. + + Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running a program using the Library is not restricted, and output from +such a program is covered only if its contents constitute a work based +on the Library (independent of the use of the Library in a tool for +writing it). Whether that is true depends on what the Library does +and what the program that uses the Library does. + + 1. You may copy and distribute verbatim copies of the Library's +complete source code as you receive it, in any medium, provided that +you conspicuously and appropriately publish on each copy an +appropriate copyright notice and disclaimer of warranty; keep intact +all the notices that refer to this License and to the absence of any +warranty; and distribute a copy of this License along with the +Library. + + You may charge a fee for the physical act of transferring a copy, +and you may at your option offer warranty protection in exchange for a +fee. + + 2. You may modify your copy or copies of the Library or any portion +of it, thus forming a work based on the Library, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) The modified work must itself be a software library. + + b) You must cause the files modified to carry prominent notices + stating that you changed the files and the date of any change. + + c) You must cause the whole of the work to be licensed at no + charge to all third parties under the terms of this License. + + d) If a facility in the modified Library refers to a function or a + table of data to be supplied by an application program that uses + the facility, other than as an argument passed when the facility + is invoked, then you must make a good faith effort to ensure that, + in the event an application does not supply such function or + table, the facility still operates, and performs whatever part of + its purpose remains meaningful. + + (For example, a function in a library to compute square roots has + a purpose that is entirely well-defined independent of the + application. Therefore, Subsection 2d requires that any + application-supplied function or table used by this function must + be optional: if the application does not supply it, the square + root function must still compute square roots.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Library, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Library, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote +it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Library. + +In addition, mere aggregation of another work not based on the Library +with the Library (or with a work based on the Library) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may opt to apply the terms of the ordinary GNU General Public +License instead of this License to a given copy of the Library. To do +this, you must alter all the notices that refer to this License, so +that they refer to the ordinary GNU General Public License, version 2, +instead of to this License. (If a newer version than version 2 of the +ordinary GNU General Public License has appeared, then you can specify +that version instead if you wish.) Do not make any other change in +these notices. + + Once this change is made in a given copy, it is irreversible for +that copy, so the ordinary GNU General Public License applies to all +subsequent copies and derivative works made from that copy. + + This option is useful when you wish to copy part of the code of +the Library into a program that is not a library. + + 4. You may copy and distribute the Library (or a portion or +derivative of it, under Section 2) in object code or executable form +under the terms of Sections 1 and 2 above provided that you accompany +it with the complete corresponding machine-readable source code, which +must be distributed under the terms of Sections 1 and 2 above on a +medium customarily used for software interchange. + + If distribution of object code is made by offering access to copy +from a designated place, then offering equivalent access to copy the +source code from the same place satisfies the requirement to +distribute the source code, even though third parties are not +compelled to copy the source along with the object code. + + 5. A program that contains no derivative of any portion of the +Library, but is designed to work with the Library by being compiled or +linked with it, is called a "work that uses the Library". Such a +work, in isolation, is not a derivative work of the Library, and +therefore falls outside the scope of this License. + + However, linking a "work that uses the Library" with the Library +creates an executable that is a derivative of the Library (because it +contains portions of the Library), rather than a "work that uses the +library". The executable is therefore covered by this License. +Section 6 states terms for distribution of such executables. + + When a "work that uses the Library" uses material from a header file +that is part of the Library, the object code for the work may be a +derivative work of the Library even though the source code is not. +Whether this is true is especially significant if the work can be +linked without the Library, or if the work is itself a library. The +threshold for this to be true is not precisely defined by law. + + If such an object file uses only numerical parameters, data +structure layouts and accessors, and small macros and small inline +functions (ten lines or less in length), then the use of the object +file is unrestricted, regardless of whether it is legally a derivative +work. (Executables containing this object code plus portions of the +Library will still fall under Section 6.) + + Otherwise, if the work is a derivative of the Library, you may +distribute the object code for the work under the terms of Section 6. +Any executables containing that work also fall under Section 6, +whether or not they are linked directly with the Library itself. + + 6. As an exception to the Sections above, you may also combine or +link a "work that uses the Library" with the Library to produce a +work containing portions of the Library, and distribute that work +under terms of your choice, provided that the terms permit +modification of the work for the customer's own use and reverse +engineering for debugging such modifications. + + You must give prominent notice with each copy of the work that the +Library is used in it and that the Library and its use are covered by +this License. You must supply a copy of this License. If the work +during execution displays copyright notices, you must include the +copyright notice for the Library among them, as well as a reference +directing the user to the copy of this License. Also, you must do one +of these things: + + a) Accompany the work with the complete corresponding + machine-readable source code for the Library including whatever + changes were used in the work (which must be distributed under + Sections 1 and 2 above); and, if the work is an executable linked + with the Library, with the complete machine-readable "work that + uses the Library", as object code and/or source code, so that the + user can modify the Library and then relink to produce a modified + executable containing the modified Library. (It is understood + that the user who changes the contents of definitions files in the + Library will not necessarily be able to recompile the application + to use the modified definitions.) + + b) Use a suitable shared library mechanism for linking with the + Library. A suitable mechanism is one that (1) uses at run time a + copy of the library already present on the user's computer system, + rather than copying library functions into the executable, and (2) + will operate properly with a modified version of the library, if + the user installs one, as long as the modified version is + interface-compatible with the version that the work was made with. + + c) Accompany the work with a written offer, valid for at least + three years, to give the same user the materials specified in + Subsection 6a, above, for a charge no more than the cost of + performing this distribution. + + d) If distribution of the work is made by offering access to copy + from a designated place, offer equivalent access to copy the above + specified materials from the same place. + + e) Verify that the user has already received a copy of these + materials or that you have already sent this user a copy. + + For an executable, the required form of the "work that uses the +Library" must include any data and utility programs needed for +reproducing the executable from it. However, as a special exception, +the materials to be distributed need not include anything that is +normally distributed (in either source or binary form) with the major +components (compiler, kernel, and so on) of the operating system on +which the executable runs, unless that component itself accompanies +the executable. + + It may happen that this requirement contradicts the license +restrictions of other proprietary libraries that do not normally +accompany the operating system. Such a contradiction means you cannot +use both them and the Library together in an executable that you +distribute. + + 7. You may place library facilities that are a work based on the +Library side-by-side in a single library together with other library +facilities not covered by this License, and distribute such a combined +library, provided that the separate distribution of the work based on +the Library and of the other library facilities is otherwise +permitted, and provided that you do these two things: + + a) Accompany the combined library with a copy of the same work + based on the Library, uncombined with any other library + facilities. This must be distributed under the terms of the + Sections above. + + b) Give prominent notice with the combined library of the fact + that part of it is a work based on the Library, and explaining + where to find the accompanying uncombined form of the same work. + + 8. You may not copy, modify, sublicense, link with, or distribute +the Library except as expressly provided under this License. Any +attempt otherwise to copy, modify, sublicense, link with, or +distribute the Library is void, and will automatically terminate your +rights under this License. However, parties who have received copies, +or rights, from you under this License will not have their licenses +terminated so long as such parties remain in full compliance. + + 9. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Library or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Library (or any work based on the +Library), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Library or works based on it. + + 10. Each time you redistribute the Library (or any work based on the +Library), the recipient automatically receives a license from the +original licensor to copy, distribute, link with or modify the Library +subject to these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties with +this License. + + 11. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Library at all. For example, if a patent +license would not permit royalty-free redistribution of the Library by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Library. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply, and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 12. If the distribution and/or use of the Library is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Library under this License +may add an explicit geographical distribution limitation excluding those +countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 13. The Free Software Foundation may publish revised and/or new +versions of the Lesser General Public License from time to time. +Such new versions will be similar in spirit to the present version, +but may differ in detail to address new problems or concerns. + +Each version is given a distinguishing version number. If the Library +specifies a version number of this License which applies to it and +"any later version", you have the option of following the terms and +conditions either of that version or of any later version published by +the Free Software Foundation. If the Library does not specify a +license version number, you may choose any version ever published by +the Free Software Foundation. + + 14. If you wish to incorporate parts of the Library into other free +programs whose distribution conditions are incompatible with these, +write to the author to ask for permission. For software which is +copyrighted by the Free Software Foundation, write to the Free +Software Foundation; we sometimes make exceptions for this. Our +decision will be guided by the two goals of preserving the free status +of all derivatives of our free software and of promoting the sharing +and reuse of software generally. + + NO WARRANTY + + 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO +WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. +EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR +OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY +KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE +LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME +THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN +WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY +AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU +FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR +CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE +LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING +RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A +FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF +SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH +DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Libraries + + If you develop a new library, and you want it to be of the greatest +possible use to the public, we recommend making it free software that +everyone can redistribute and change. You can do so by permitting +redistribution under these terms (or, alternatively, under the terms +of the ordinary General Public License). + + To apply these terms, attach the following notices to the library. +It is safest to attach them to the start of each source file to most +effectively convey the exclusion of warranty; and each file should +have at least the "copyright" line and a pointer to where the full +notice is found. + + + <one line to give the library's name and a brief idea of what it does.> + Copyright (C) <year> <name of author> + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +Also add information on how to contact you by electronic and paper mail. + +You should also get your employer (if you work as a programmer) or +your school, if any, to sign a "copyright disclaimer" for the library, +if necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the + library `Frob' (a library for tweaking knobs) written by James + Random Hacker. + + <signature of Ty Coon>, 1 April 1990 + Ty Coon, President of Vice + +That's all there is to it! diff --git a/lib/ccan/asearch/_info b/lib/ccan/asearch/_info new file mode 100644 index 0000000000..857475016f --- /dev/null +++ b/lib/ccan/asearch/_info @@ -0,0 +1,58 @@ +#include <stdio.h> +#include <string.h> +#include "config.h" + +/** + * asearch - typesafe binary search (bsearch) + * + * An ordered array of objects can be efficiently searched using a binary + * search algorithm; the time taken is around log(number of elements). + * + * This version uses macros to be typesafe on platforms which support it. + * + * License: LGPL + * Author: Rusty Russell <rusty@rustcorp.com.au> + * + * Example: + * #include <ccan/asearch/asearch.h> + * #include <stdio.h> + * #include <string.h> + * + * static int cmp(const char *key, char *const *elem) + * { + * return strcmp(key, *elem); + * } + * + * int main(int argc, char *argv[]) + * { + * char **p; + * + * if (argc < 2) { + * fprintf(stderr, "Usage: %s <key> <list>...\n" + * "Print position of key in (sorted) list\n", + * argv[0]); + * exit(1); + * } + * + * p = asearch(argv[1], &argv[2], argc-2, cmp); + * if (!p) { + * printf("Not found!\n"); + * return 1; + * } + * printf("%u\n", p - &argv[2]); + * return 0; + * } + */ +int main(int argc, char *argv[]) +{ + if (argc != 2) + return 1; + + if (strcmp(argv[1], "depends") == 0) { + printf("ccan/typesafe_cb\n"); + printf("ccan/array_size\n"); + return 0; + } + + return 1; +} diff --git a/lib/ccan/asearch/asearch.h b/lib/ccan/asearch/asearch.h new file mode 100644 index 0000000000..d252284e7d --- /dev/null +++ b/lib/ccan/asearch/asearch.h @@ -0,0 +1,37 @@ +#ifndef CCAN_ASEARCH_H +#define CCAN_ASEARCH_H +#include <stdlib.h> +#include <ccan/typesafe_cb/typesafe_cb.h> + +/** + * asearch - search an array of elements + * @key: pointer to item being searched for + * @base: pointer to data to sort + * @num: number of elements + * @cmp: pointer to comparison function + * + * This function does a binary search on the given array. The + * contents of the array should already be in ascending sorted order + * under the provided comparison function. + * + * Note that the key need not have the same type as the elements in + * the array, e.g. key could be a string and the comparison function + * could compare the string with the struct's name field. However, if + * the key and elements in the array are of the same type, you can use + * the same comparison function for both sort() and asearch(). + */ +#if HAVE_TYPEOF +#define asearch(key, base, num, cmp) \ + ((__typeof__(*(base))*)(bsearch((key), (base), (num), sizeof(*(base)), \ + typesafe_cb_cast(int (*)(const void *, const void *), \ + int (*)(const __typeof__(*(key)) *, \ + const __typeof__(*(base)) *), \ + (cmp))))) + +#else +#define asearch(key, base, num, cmp) \ + (bsearch((key), (base), (num), sizeof(*(base)), \ + (int (*)(const void *, const void *))(cmp))) +#endif + +#endif /* CCAN_ASEARCH_H */ diff --git a/lib/ccan/asearch/test/compile_fail-return-value-const.c b/lib/ccan/asearch/test/compile_fail-return-value-const.c new file mode 100644 index 0000000000..2edee93501 --- /dev/null +++ b/lib/ccan/asearch/test/compile_fail-return-value-const.c @@ -0,0 +1,25 @@ +#include <ccan/asearch/asearch.h> +#include <ccan/array_size/array_size.h> +#include <string.h> + +static int cmp(const char *key, const char *const *elem) +{ + return strcmp(key, *elem); +} + +int main(void) +{ + const char key[] = "key"; + const char *elems[] = { "a", "big", "list", "of", "things" }; + +#ifdef FAIL + char **p; +#if !HAVE_TYPEOF +#error "Unfortunately we don't fail if no typeof." +#endif +#else + const char **p; +#endif + p = asearch(key, elems, ARRAY_SIZE(elems), cmp); + return p ? 0 : 1; +} diff --git a/lib/ccan/asearch/test/compile_fail-return-value.c b/lib/ccan/asearch/test/compile_fail-return-value.c new file mode 100644 index 0000000000..4aef5327a8 --- /dev/null +++ b/lib/ccan/asearch/test/compile_fail-return-value.c @@ -0,0 +1,22 @@ +#include <ccan/asearch/asearch.h> + +static int cmp(const char *key, char *const *elem) +{ + return 0; +} + +int main(int argc, char **argv) +{ + const char key[] = "key"; + +#ifdef FAIL + int **p; +#if !HAVE_TYPEOF +#error "Unfortunately we don't fail if no typeof." +#endif +#else + char **p; +#endif + p = asearch(key, argv+1, argc-1, cmp); + return p ? 0 : 1; +} diff --git a/lib/ccan/asearch/test/run-strings.c b/lib/ccan/asearch/test/run-strings.c new file mode 100644 index 0000000000..3ec453842f --- /dev/null +++ b/lib/ccan/asearch/test/run-strings.c @@ -0,0 +1,22 @@ +#include <ccan/asearch/asearch.h> +#include <ccan/array_size/array_size.h> +#include <ccan/tap/tap.h> +#include <stdlib.h> + +static int cmp(const int *key, const char *const *elem) +{ + return *key - atoi(*elem); +} + +int main(void) +{ + const char *args[] = { "1", "4", "7", "9" }; + int key = 7; + const char **p; + + plan_tests(1); + p = asearch(&key, args, ARRAY_SIZE(args), cmp); + ok1(p == &args[2]); + + return exit_status(); +} diff --git a/lib/ccan/asearch/test/run.c b/lib/ccan/asearch/test/run.c new file mode 100644 index 0000000000..2a896fccfe --- /dev/null +++ b/lib/ccan/asearch/test/run.c @@ -0,0 +1,40 @@ +#include <ccan/asearch/asearch.h> +#include <ccan/array_size/array_size.h> +#include <ccan/tap/tap.h> +#include <limits.h> + +static int test_cmp(const int *key, const int *elt) +{ + if (*key < *elt) + return -1; + else if (*key > *elt) + return 1; + return 0; +} + +int main(void) +{ + const int arr[] = { INT_MIN, 0, 1, 2, 3, 4, 5, 6, INT_MAX }; + unsigned int start, num, i, total = 0; + int key; + + plan_tests(285); + + for (start = 0; start < ARRAY_SIZE(arr); start++) { + for (num = 0; num < ARRAY_SIZE(arr) - start; num++) { + key = 7; + ok1(asearch(&key, &arr[start], num, test_cmp) == NULL); + total++; + for (i = start; i < start+num; i++) { + const int *ret; + key = arr[i]; + ret = asearch(&key, &arr[start], num, test_cmp); + ok1(ret); + ok1(ret && *ret == key); + total++; + } + } + } + diag("Tested %u searches\n", total); + return exit_status(); +} diff --git a/lib/ccan/build_assert/LICENSE b/lib/ccan/build_assert/LICENSE new file mode 100644 index 0000000000..5522aa5f33 --- /dev/null +++ b/lib/ccan/build_assert/LICENSE @@ -0,0 +1,508 @@ + + GNU LESSER GENERAL PUBLIC LICENSE + Version 2.1, February 1999 + + Copyright (C) 1991, 1999 Free Software Foundation, Inc. + 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + +[This is the first released version of the Lesser GPL. It also counts + as the successor of the GNU Library Public License, version 2, hence + the version number 2.1.] + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +Licenses are intended to guarantee your freedom to share and change +free software--to make sure the software is free for all its users. + + This license, the Lesser General Public License, applies to some +specially designated software packages--typically libraries--of the +Free Software Foundation and other authors who decide to use it. You +can use it too, but we suggest you first think carefully about whether +this license or the ordinary General Public License is the better +strategy to use in any particular case, based on the explanations +below. + + When we speak of free software, we are referring to freedom of use, +not price. Our General Public Licenses are designed to make sure that +you have the freedom to distribute copies of free software (and charge +for this service if you wish); that you receive source code or can get +it if you want it; that you can change the software and use pieces of +it in new free programs; and that you are informed that you can do +these things. + + To protect your rights, we need to make restrictions that forbid +distributors to deny you these rights or to ask you to surrender these +rights. These restrictions translate to certain responsibilities for +you if you distribute copies of the library or if you modify it. + + For example, if you distribute copies of the library, whether gratis +or for a fee, you must give the recipients all the rights that we gave +you. You must make sure that they, too, receive or can get the source +code. If you link other code with the library, you must provide +complete object files to the recipients, so that they can relink them +with the library after making changes to the library and recompiling +it. And you must show them these terms so they know their rights. + + We protect your rights with a two-step method: (1) we copyright the +library, and (2) we offer you this license, which gives you legal +permission to copy, distribute and/or modify the library. + + To protect each distributor, we want to make it very clear that +there is no warranty for the free library. Also, if the library is +modified by someone else and passed on, the recipients should know +that what they have is not the original version, so that the original +author's reputation will not be affected by problems that might be +introduced by others. + + Finally, software patents pose a constant threat to the existence of +any free program. We wish to make sure that a company cannot +effectively restrict the users of a free program by obtaining a +restrictive license from a patent holder. Therefore, we insist that +any patent license obtained for a version of the library must be +consistent with the full freedom of use specified in this license. + + Most GNU software, including some libraries, is covered by the +ordinary GNU General Public License. This license, the GNU Lesser +General Public License, applies to certain designated libraries, and +is quite different from the ordinary General Public License. We use +this license for certain libraries in order to permit linking those +libraries into non-free programs. + + When a program is linked with a library, whether statically or using +a shared library, the combination of the two is legally speaking a +combined work, a derivative of the original library. The ordinary +General Public License therefore permits such linking only if the +entire combination fits its criteria of freedom. The Lesser General +Public License permits more lax criteria for linking other code with +the library. + + We call this license the "Lesser" General Public License because it +does Less to protect the user's freedom than the ordinary General +Public License. It also provides other free software developers Less +of an advantage over competing non-free programs. These disadvantages +are the reason we use the ordinary General Public License for many +libraries. However, the Lesser license provides advantages in certain +special circumstances. + + For example, on rare occasions, there may be a special need to +encourage the widest possible use of a certain library, so that it +becomes a de-facto standard. To achieve this, non-free programs must +be allowed to use the library. A more frequent case is that a free +library does the same job as widely used non-free libraries. In this +case, there is little to gain by limiting the free library to free +software only, so we use the Lesser General Public License. + + In other cases, permission to use a particular library in non-free +programs enables a greater number of people to use a large body of +free software. For example, permission to use the GNU C Library in +non-free programs enables many more people to use the whole GNU +operating system, as well as its variant, the GNU/Linux operating +system. + + Although the Lesser General Public License is Less protective of the +users' freedom, it does ensure that the user of a program that is +linked with the Library has the freedom and the wherewithal to run +that program using a modified version of the Library. + + The precise terms and conditions for copying, distribution and +modification follow. Pay close attention to the difference between a +"work based on the library" and a "work that uses the library". The +former contains code derived from the library, whereas the latter must +be combined with the library in order to run. + + GNU LESSER GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License Agreement applies to any software library or other +program which contains a notice placed by the copyright holder or +other authorized party saying it may be distributed under the terms of +this Lesser General Public License (also called "this License"). +Each licensee is addressed as "you". + + A "library" means a collection of software functions and/or data +prepared so as to be conveniently linked with application programs +(which use some of those functions and data) to form executables. + + The "Library", below, refers to any such software library or work +which has been distributed under these terms. A "work based on the +Library" means either the Library or any derivative work under +copyright law: that is to say, a work containing the Library or a +portion of it, either verbatim or with modifications and/or translated +straightforwardly into another language. (Hereinafter, translation is +included without limitation in the term "modification".) + + "Source code" for a work means the preferred form of the work for +making modifications to it. For a library, complete source code means +all the source code for all modules it contains, plus any associated +interface definition files, plus the scripts used to control +compilation and installation of the library. + + Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running a program using the Library is not restricted, and output from +such a program is covered only if its contents constitute a work based +on the Library (independent of the use of the Library in a tool for +writing it). Whether that is true depends on what the Library does +and what the program that uses the Library does. + + 1. You may copy and distribute verbatim copies of the Library's +complete source code as you receive it, in any medium, provided that +you conspicuously and appropriately publish on each copy an +appropriate copyright notice and disclaimer of warranty; keep intact +all the notices that refer to this License and to the absence of any +warranty; and distribute a copy of this License along with the +Library. + + You may charge a fee for the physical act of transferring a copy, +and you may at your option offer warranty protection in exchange for a +fee. + + 2. You may modify your copy or copies of the Library or any portion +of it, thus forming a work based on the Library, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) The modified work must itself be a software library. + + b) You must cause the files modified to carry prominent notices + stating that you changed the files and the date of any change. + + c) You must cause the whole of the work to be licensed at no + charge to all third parties under the terms of this License. + + d) If a facility in the modified Library refers to a function or a + table of data to be supplied by an application program that uses + the facility, other than as an argument passed when the facility + is invoked, then you must make a good faith effort to ensure that, + in the event an application does not supply such function or + table, the facility still operates, and performs whatever part of + its purpose remains meaningful. + + (For example, a function in a library to compute square roots has + a purpose that is entirely well-defined independent of the + application. Therefore, Subsection 2d requires that any + application-supplied function or table used by this function must + be optional: if the application does not supply it, the square + root function must still compute square roots.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Library, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Library, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote +it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Library. + +In addition, mere aggregation of another work not based on the Library +with the Library (or with a work based on the Library) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may opt to apply the terms of the ordinary GNU General Public +License instead of this License to a given copy of the Library. To do +this, you must alter all the notices that refer to this License, so +that they refer to the ordinary GNU General Public License, version 2, +instead of to this License. (If a newer version than version 2 of the +ordinary GNU General Public License has appeared, then you can specify +that version instead if you wish.) Do not make any other change in +these notices. + + Once this change is made in a given copy, it is irreversible for +that copy, so the ordinary GNU General Public License applies to all +subsequent copies and derivative works made from that copy. + + This option is useful when you wish to copy part of the code of +the Library into a program that is not a library. + + 4. You may copy and distribute the Library (or a portion or +derivative of it, under Section 2) in object code or executable form +under the terms of Sections 1 and 2 above provided that you accompany +it with the complete corresponding machine-readable source code, which +must be distributed under the terms of Sections 1 and 2 above on a +medium customarily used for software interchange. + + If distribution of object code is made by offering access to copy +from a designated place, then offering equivalent access to copy the +source code from the same place satisfies the requirement to +distribute the source code, even though third parties are not +compelled to copy the source along with the object code. + + 5. A program that contains no derivative of any portion of the +Library, but is designed to work with the Library by being compiled or +linked with it, is called a "work that uses the Library". Such a +work, in isolation, is not a derivative work of the Library, and +therefore falls outside the scope of this License. + + However, linking a "work that uses the Library" with the Library +creates an executable that is a derivative of the Library (because it +contains portions of the Library), rather than a "work that uses the +library". The executable is therefore covered by this License. +Section 6 states terms for distribution of such executables. + + When a "work that uses the Library" uses material from a header file +that is part of the Library, the object code for the work may be a +derivative work of the Library even though the source code is not. +Whether this is true is especially significant if the work can be +linked without the Library, or if the work is itself a library. The +threshold for this to be true is not precisely defined by law. + + If such an object file uses only numerical parameters, data +structure layouts and accessors, and small macros and small inline +functions (ten lines or less in length), then the use of the object +file is unrestricted, regardless of whether it is legally a derivative +work. (Executables containing this object code plus portions of the +Library will still fall under Section 6.) + + Otherwise, if the work is a derivative of the Library, you may +distribute the object code for the work under the terms of Section 6. +Any executables containing that work also fall under Section 6, +whether or not they are linked directly with the Library itself. + + 6. As an exception to the Sections above, you may also combine or +link a "work that uses the Library" with the Library to produce a +work containing portions of the Library, and distribute that work +under terms of your choice, provided that the terms permit +modification of the work for the customer's own use and reverse +engineering for debugging such modifications. + + You must give prominent notice with each copy of the work that the +Library is used in it and that the Library and its use are covered by +this License. You must supply a copy of this License. If the work +during execution displays copyright notices, you must include the +copyright notice for the Library among them, as well as a reference +directing the user to the copy of this License. Also, you must do one +of these things: + + a) Accompany the work with the complete corresponding + machine-readable source code for the Library including whatever + changes were used in the work (which must be distributed under + Sections 1 and 2 above); and, if the work is an executable linked + with the Library, with the complete machine-readable "work that + uses the Library", as object code and/or source code, so that the + user can modify the Library and then relink to produce a modified + executable containing the modified Library. (It is understood + that the user who changes the contents of definitions files in the + Library will not necessarily be able to recompile the application + to use the modified definitions.) + + b) Use a suitable shared library mechanism for linking with the + Library. A suitable mechanism is one that (1) uses at run time a + copy of the library already present on the user's computer system, + rather than copying library functions into the executable, and (2) + will operate properly with a modified version of the library, if + the user installs one, as long as the modified version is + interface-compatible with the version that the work was made with. + + c) Accompany the work with a written offer, valid for at least + three years, to give the same user the materials specified in + Subsection 6a, above, for a charge no more than the cost of + performing this distribution. + + d) If distribution of the work is made by offering access to copy + from a designated place, offer equivalent access to copy the above + specified materials from the same place. + + e) Verify that the user has already received a copy of these + materials or that you have already sent this user a copy. + + For an executable, the required form of the "work that uses the +Library" must include any data and utility programs needed for +reproducing the executable from it. However, as a special exception, +the materials to be distributed need not include anything that is +normally distributed (in either source or binary form) with the major +components (compiler, kernel, and so on) of the operating system on +which the executable runs, unless that component itself accompanies +the executable. + + It may happen that this requirement contradicts the license +restrictions of other proprietary libraries that do not normally +accompany the operating system. Such a contradiction means you cannot +use both them and the Library together in an executable that you +distribute. + + 7. You may place library facilities that are a work based on the +Library side-by-side in a single library together with other library +facilities not covered by this License, and distribute such a combined +library, provided that the separate distribution of the work based on +the Library and of the other library facilities is otherwise +permitted, and provided that you do these two things: + + a) Accompany the combined library with a copy of the same work + based on the Library, uncombined with any other library + facilities. This must be distributed under the terms of the + Sections above. + + b) Give prominent notice with the combined library of the fact + that part of it is a work based on the Library, and explaining + where to find the accompanying uncombined form of the same work. + + 8. You may not copy, modify, sublicense, link with, or distribute +the Library except as expressly provided under this License. Any +attempt otherwise to copy, modify, sublicense, link with, or +distribute the Library is void, and will automatically terminate your +rights under this License. However, parties who have received copies, +or rights, from you under this License will not have their licenses +terminated so long as such parties remain in full compliance. + + 9. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Library or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Library (or any work based on the +Library), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Library or works based on it. + + 10. Each time you redistribute the Library (or any work based on the +Library), the recipient automatically receives a license from the +original licensor to copy, distribute, link with or modify the Library +subject to these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties with +this License. + + 11. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Library at all. For example, if a patent +license would not permit royalty-free redistribution of the Library by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Library. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply, and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 12. If the distribution and/or use of the Library is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Library under this License +may add an explicit geographical distribution limitation excluding those +countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 13. The Free Software Foundation may publish revised and/or new +versions of the Lesser General Public License from time to time. +Such new versions will be similar in spirit to the present version, +but may differ in detail to address new problems or concerns. + +Each version is given a distinguishing version number. If the Library +specifies a version number of this License which applies to it and +"any later version", you have the option of following the terms and +conditions either of that version or of any later version published by +the Free Software Foundation. If the Library does not specify a +license version number, you may choose any version ever published by +the Free Software Foundation. + + 14. If you wish to incorporate parts of the Library into other free +programs whose distribution conditions are incompatible with these, +write to the author to ask for permission. For software which is +copyrighted by the Free Software Foundation, write to the Free +Software Foundation; we sometimes make exceptions for this. Our +decision will be guided by the two goals of preserving the free status +of all derivatives of our free software and of promoting the sharing +and reuse of software generally. + + NO WARRANTY + + 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO +WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. +EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR +OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY +KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE +LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME +THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN +WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY +AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU +FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR +CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE +LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING +RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A +FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF +SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH +DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Libraries + + If you develop a new library, and you want it to be of the greatest +possible use to the public, we recommend making it free software that +everyone can redistribute and change. You can do so by permitting +redistribution under these terms (or, alternatively, under the terms +of the ordinary General Public License). + + To apply these terms, attach the following notices to the library. +It is safest to attach them to the start of each source file to most +effectively convey the exclusion of warranty; and each file should +have at least the "copyright" line and a pointer to where the full +notice is found. + + + <one line to give the library's name and a brief idea of what it does.> + Copyright (C) <year> <name of author> + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +Also add information on how to contact you by electronic and paper mail. + +You should also get your employer (if you work as a programmer) or +your school, if any, to sign a "copyright disclaimer" for the library, +if necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the + library `Frob' (a library for tweaking knobs) written by James + Random Hacker. + + <signature of Ty Coon>, 1 April 1990 + Ty Coon, President of Vice + +That's all there is to it! diff --git a/lib/ccan/build_assert/_info b/lib/ccan/build_assert/_info new file mode 100644 index 0000000000..0906af07e6 --- /dev/null +++ b/lib/ccan/build_assert/_info @@ -0,0 +1,49 @@ +#include <stdio.h> +#include <string.h> +#include "config.h" + +/** + * build_assert - routines for build-time assertions + * + * This code provides routines which will cause compilation to fail should some + * assertion be untrue: such failures are preferable to run-time assertions, + * but much more limited since they can only depends on compile-time constants. + * + * These assertions are most useful when two parts of the code must be kept in + * sync: it is better to avoid such cases if possible, but seconds best is to + * detect invalid changes at build time. + * + * For example, a tricky piece of code might rely on a certain element being at + * the start of the structure. To ensure that future changes don't break it, + * you would catch such changes in your code like so: + * + * Example: + * #include <stddef.h> + * #include <ccan/build_assert/build_assert.h> + * + * struct foo { + * char string[5]; + * int x; + * }; + * + * static char *foo_string(struct foo *foo) + * { + * // This trick requires that the string be first in the structure + * BUILD_ASSERT(offsetof(struct foo, string) == 0); + * return (char *)foo; + * } + * + * License: LGPL (2 or any later version) + * Author: Rusty Russell <rusty@rustcorp.com.au> + */ +int main(int argc, char *argv[]) +{ + if (argc != 2) + return 1; + + if (strcmp(argv[1], "depends") == 0) + /* Nothing. */ + return 0; + + return 1; +} diff --git a/lib/ccan/build_assert/build_assert.h b/lib/ccan/build_assert/build_assert.h new file mode 100644 index 0000000000..24e59c44cd --- /dev/null +++ b/lib/ccan/build_assert/build_assert.h @@ -0,0 +1,39 @@ +#ifndef CCAN_BUILD_ASSERT_H +#define CCAN_BUILD_ASSERT_H + +/** + * BUILD_ASSERT - assert a build-time dependency. + * @cond: the compile-time condition which must be true. + * + * Your compile will fail if the condition isn't true, or can't be evaluated + * by the compiler. This can only be used within a function. + * + * Example: + * #include <stddef.h> + * ... + * static char *foo_to_char(struct foo *foo) + * { + * // This code needs string to be at start of foo. + * BUILD_ASSERT(offsetof(struct foo, string) == 0); + * return (char *)foo; + * } + */ +#define BUILD_ASSERT(cond) \ + do { (void) sizeof(char [1 - 2*!(cond)]); } while(0) + +/** + * BUILD_ASSERT_OR_ZERO - assert a build-time dependency, as an expression. + * @cond: the compile-time condition which must be true. + * + * Your compile will fail if the condition isn't true, or can't be evaluated + * by the compiler. This can be used in an expression: its value is "0". + * + * Example: + * #define foo_to_char(foo) \ + * ((char *)(foo) \ + * + BUILD_ASSERT_OR_ZERO(offsetof(struct foo, string) == 0)) + */ +#define BUILD_ASSERT_OR_ZERO(cond) \ + (sizeof(char [1 - 2*!(cond)]) - 1) + +#endif /* CCAN_BUILD_ASSERT_H */ diff --git a/lib/ccan/build_assert/test/compile_fail-expr.c b/lib/ccan/build_assert/test/compile_fail-expr.c new file mode 100644 index 0000000000..109215b8aa --- /dev/null +++ b/lib/ccan/build_assert/test/compile_fail-expr.c @@ -0,0 +1,10 @@ +#include <ccan/build_assert/build_assert.h> + +int main(int argc, char *argv[]) +{ +#ifdef FAIL + return BUILD_ASSERT_OR_ZERO(1 == 0); +#else + return 0; +#endif +} diff --git a/lib/ccan/build_assert/test/compile_fail.c b/lib/ccan/build_assert/test/compile_fail.c new file mode 100644 index 0000000000..37d95eddc9 --- /dev/null +++ b/lib/ccan/build_assert/test/compile_fail.c @@ -0,0 +1,9 @@ +#include <ccan/build_assert/build_assert.h> + +int main(int argc, char *argv[]) +{ +#ifdef FAIL + BUILD_ASSERT(1 == 0); +#endif + return 0; +} diff --git a/lib/ccan/build_assert/test/compile_ok.c b/lib/ccan/build_assert/test/compile_ok.c new file mode 100644 index 0000000000..4105484d1f --- /dev/null +++ b/lib/ccan/build_assert/test/compile_ok.c @@ -0,0 +1,7 @@ +#include <ccan/build_assert/build_assert.h> + +int main(int argc, char *argv[]) +{ + BUILD_ASSERT(1 == 1); + return 0; +} diff --git a/lib/ccan/build_assert/test/run-BUILD_ASSERT_OR_ZERO.c b/lib/ccan/build_assert/test/run-BUILD_ASSERT_OR_ZERO.c new file mode 100644 index 0000000000..4185821331 --- /dev/null +++ b/lib/ccan/build_assert/test/run-BUILD_ASSERT_OR_ZERO.c @@ -0,0 +1,9 @@ +#include <ccan/build_assert/build_assert.h> +#include <ccan/tap/tap.h> + +int main(int argc, char *argv[]) +{ + plan_tests(1); + ok1(BUILD_ASSERT_OR_ZERO(1 == 1) == 0); + return exit_status(); +} diff --git a/lib/ccan/build_assert/test/run-EXPR_BUILD_ASSERT.c b/lib/ccan/build_assert/test/run-EXPR_BUILD_ASSERT.c new file mode 100644 index 0000000000..91bbbbbf75 --- /dev/null +++ b/lib/ccan/build_assert/test/run-EXPR_BUILD_ASSERT.c @@ -0,0 +1,9 @@ +#include <ccan/build_assert/build_assert.h> +#include <ccan/tap/tap.h> + +int main(int argc, char *argv[]) +{ + plan_tests(1); + ok1(EXPR_BUILD_ASSERT(1 == 1) == 0); + return exit_status(); +} diff --git a/lib/ccan/cast/LICENSE b/lib/ccan/cast/LICENSE new file mode 100644 index 0000000000..cca7fc278f --- /dev/null +++ b/lib/ccan/cast/LICENSE @@ -0,0 +1,165 @@ + GNU LESSER GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/> + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + + This version of the GNU Lesser General Public License incorporates +the terms and conditions of version 3 of the GNU General Public +License, supplemented by the additional permissions listed below. + + 0. Additional Definitions. + + As used herein, "this License" refers to version 3 of the GNU Lesser +General Public License, and the "GNU GPL" refers to version 3 of the GNU +General Public License. + + "The Library" refers to a covered work governed by this License, +other than an Application or a Combined Work as defined below. + + An "Application" is any work that makes use of an interface provided +by the Library, but which is not otherwise based on the Library. +Defining a subclass of a class defined by the Library is deemed a mode +of using an interface provided by the Library. + + A "Combined Work" is a work produced by combining or linking an +Application with the Library. The particular version of the Library +with which the Combined Work was made is also called the "Linked +Version". + + The "Minimal Corresponding Source" for a Combined Work means the +Corresponding Source for the Combined Work, excluding any source code +for portions of the Combined Work that, considered in isolation, are +based on the Application, and not on the Linked Version. + + The "Corresponding Application Code" for a Combined Work means the +object code and/or source code for the Application, including any data +and utility programs needed for reproducing the Combined Work from the +Application, but excluding the System Libraries of the Combined Work. + + 1. Exception to Section 3 of the GNU GPL. + + You may convey a covered work under sections 3 and 4 of this License +without being bound by section 3 of the GNU GPL. + + 2. Conveying Modified Versions. + + If you modify a copy of the Library, and, in your modifications, a +facility refers to a function or data to be supplied by an Application +that uses the facility (other than as an argument passed when the +facility is invoked), then you may convey a copy of the modified +version: + + a) under this License, provided that you make a good faith effort to + ensure that, in the event an Application does not supply the + function or data, the facility still operates, and performs + whatever part of its purpose remains meaningful, or + + b) under the GNU GPL, with none of the additional permissions of + this License applicable to that copy. + + 3. Object Code Incorporating Material from Library Header Files. + + The object code form of an Application may incorporate material from +a header file that is part of the Library. You may convey such object +code under terms of your choice, provided that, if the incorporated +material is not limited to numerical parameters, data structure +layouts and accessors, or small macros, inline functions and templates +(ten or fewer lines in length), you do both of the following: + + a) Give prominent notice with each copy of the object code that the + Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the object code with a copy of the GNU GPL and this license + document. + + 4. Combined Works. + + You may convey a Combined Work under terms of your choice that, +taken together, effectively do not restrict modification of the +portions of the Library contained in the Combined Work and reverse +engineering for debugging such modifications, if you also do each of +the following: + + a) Give prominent notice with each copy of the Combined Work that + the Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the Combined Work with a copy of the GNU GPL and this license + document. + + c) For a Combined Work that displays copyright notices during + execution, include the copyright notice for the Library among + these notices, as well as a reference directing the user to the + copies of the GNU GPL and this license document. + + d) Do one of the following: + + 0) Convey the Minimal Corresponding Source under the terms of this + License, and the Corresponding Application Code in a form + suitable for, and under terms that permit, the user to + recombine or relink the Application with a modified version of + the Linked Version to produce a modified Combined Work, in the + manner specified by section 6 of the GNU GPL for conveying + Corresponding Source. + + 1) Use a suitable shared library mechanism for linking with the + Library. A suitable mechanism is one that (a) uses at run time + a copy of the Library already present on the user's computer + system, and (b) will operate properly with a modified version + of the Library that is interface-compatible with the Linked + Version. + + e) Provide Installation Information, but only if you would otherwise + be required to provide such information under section 6 of the + GNU GPL, and only to the extent that such information is + necessary to install and execute a modified version of the + Combined Work produced by recombining or relinking the + Application with a modified version of the Linked Version. (If + you use option 4d0, the Installation Information must accompany + the Minimal Corresponding Source and Corresponding Application + Code. If you use option 4d1, you must provide the Installation + Information in the manner specified by section 6 of the GNU GPL + for conveying Corresponding Source.) + + 5. Combined Libraries. + + You may place library facilities that are a work based on the +Library side by side in a single library together with other library +facilities that are not Applications and are not covered by this +License, and convey such a combined library under terms of your +choice, if you do both of the following: + + a) Accompany the combined library with a copy of the same work based + on the Library, uncombined with any other library facilities, + conveyed under the terms of this License. + + b) Give prominent notice with the combined library that part of it + is a work based on the Library, and explaining where to find the + accompanying uncombined form of the same work. + + 6. Revised Versions of the GNU Lesser General Public License. + + The Free Software Foundation may publish revised and/or new versions +of the GNU Lesser General Public License from time to time. Such new +versions will be similar in spirit to the present version, but may +differ in detail to address new problems or concerns. + + Each version is given a distinguishing version number. If the +Library as you received it specifies that a certain numbered version +of the GNU Lesser General Public License "or any later version" +applies to it, you have the option of following the terms and +conditions either of that published version or of any later version +published by the Free Software Foundation. If the Library as you +received it does not specify a version number of the GNU Lesser +General Public License, you may choose any version of the GNU Lesser +General Public License ever published by the Free Software Foundation. + + If the Library as you received it specifies that a proxy can decide +whether future versions of the GNU Lesser General Public License shall +apply, that proxy's public statement of acceptance of any version is +permanent authorization for you to choose that version for the +Library. diff --git a/lib/ccan/cast/_info b/lib/ccan/cast/_info new file mode 100644 index 0000000000..5f82a05b8f --- /dev/null +++ b/lib/ccan/cast/_info @@ -0,0 +1,84 @@ +#include <string.h> +#include "config.h" + +/** + * cast - routines for safer casting. + * + * Often you want to cast in a limited way, such as removing a const or + * switching between integer types. However, normal casts will work on + * almost any type, making them dangerous when the code changes. + * + * These C++-inspired macros serve two purposes: they make it clear the + * exact reason for the cast, and they also (with some compilers) cause + * errors when misused. + * + * Based on Jan Engelhardt's libHX macros: http://libhx.sourceforge.net/ + * + * Author: Jan Engelhardt + * Maintainer: Rusty Russell <rusty@rustcorp.com.au> + * License: LGPL + * + * Example: + * // Given "test" contains "3 t's in 'test string' + * #include <ccan/cast/cast.h> + * #include <stdint.h> + * #include <stdio.h> + * + * // Find char @orig in @str, if @repl, replace them. Return number. + * static size_t find_chars(char *str, char orig, char repl) + * { + * size_t i, count = 0; + * for (i = 0; str[i]; i++) { + * if (str[i] == orig) { + * count++; + * if (repl) + * str[i] = repl; + * } + * } + * return count; + * } + * + * // Terrible hash function. + * static uint64_t hash_string(const unsigned char *str) + * { + * size_t i; + * uint64_t hash = 0; + * for (i = 0; str[i]; i++) + * hash += str[i]; + * return hash; + * } + * + * int main(int argc, char *argv[]) + * { + * uint64_t hash; + * + * // find_chars wants a non-const string, but doesn't + * // need it if repl == 0. + * printf("%zu %c's in 'test string'\n", + * find_chars(cast_const(char *, "test string"), + * argv[1][0], 0), + * argv[1][0]); + * + * // hash_string wants an unsigned char. + * hash = hash_string(cast_signed(unsigned char *, argv[1])); + * + * // Need a long long to hand to printf. + * printf("Hash of '%s' = %llu\n", argv[1], + * cast_static(unsigned long long, hash)); + * return 0; + * } + * + */ +int main(int argc, char *argv[]) +{ + /* Expect exactly one argument */ + if (argc != 2) + return 1; + + if (strcmp(argv[1], "depends") == 0) { + printf("ccan/build_assert\n"); + return 0; + } + + return 1; +} diff --git a/lib/ccan/cast/cast.h b/lib/ccan/cast/cast.h new file mode 100644 index 0000000000..daebd85723 --- /dev/null +++ b/lib/ccan/cast/cast.h @@ -0,0 +1,129 @@ +#ifndef CCAN_CAST_H +#define CCAN_CAST_H +#include "config.h" +#include <stdint.h> +#include <ccan/build_assert/build_assert.h> + +/** + * cast_signed - cast a (const) char * to/from (const) signed/unsigned char *. + * @type: some char * variant. + * @expr: expression (of some char * variant) to cast. + * + * Some libraries insist on an unsigned char in various places; cast_signed + * makes sure (with suitable compiler) that the expression you are casting + * only differs in signed/unsigned, not in type or const-ness. + */ +#define cast_signed(type, expr) \ + ((type)(expr) \ + + BUILD_ASSERT_OR_ZERO(cast_sign_compatible(type, (expr)))) + +/** + * cast_const - remove a const qualifier from a pointer. + * @type: some pointer type. + * @expr: expression to cast. + * + * This ensures that you are only removing the const qualifier from an + * expression. The expression must otherwise match @type. + * + * If @type is a pointer to a pointer, you must use cast_const2 (etc). + * + * Example: + * // Dumb open-coded strstr variant. + * static char *find_needle(const char *haystack) + * { + * size_t i; + * for (i = 0; i < strlen(haystack); i++) + * if (memcmp("needle", haystack+i, strlen("needle")) == 0) + * return cast_const(char *, haystack+i); + * return NULL; + * } + */ +#define cast_const(type, expr) \ + ((type)((intptr_t)(expr) \ + + BUILD_ASSERT_OR_ZERO(cast_const_compat1((expr), type)))) + +/** + * cast_const2 - remove a const qualifier from a pointer to a pointer. + * @type: some pointer to pointer type. + * @expr: expression to cast. + * + * This ensures that you are only removing the const qualifier from an + * expression. The expression must otherwise match @type. + */ +#define cast_const2(type, expr) \ + ((type)((intptr_t)(expr) \ + + BUILD_ASSERT_OR_ZERO(cast_const_compat2((expr), type)))) + +/** + * cast_const3 - remove a const from a pointer to a pointer to a pointer.. + * @type: some pointer to pointer to pointer type. + * @expr: expression to cast. + * + * This ensures that you are only removing the const qualifier from an + * expression. The expression must otherwise match @type. + */ +#define cast_const3(type, expr) \ + ((type)((intptr_t)(expr) \ + + BUILD_ASSERT_OR_ZERO(cast_const_compat3((expr), type)))) + + +/** + * cast_static - explicit mimic of implicit cast. + * @type: some type. + * @expr: expression to cast. + * + * This ensures that the cast is not to or from a pointer: it can only be + * an implicit cast, such as a pointer to a similar const pointer, or between + * integral types. + */ +#if HAVE_COMPOUND_LITERALS +#define cast_static(type, expr) \ + ((struct { type x; }){(expr)}.x) +#else +#define cast_static(type, expr) \ + ((type)(expr)) +#endif + +/* Herein lies the gcc magic to evoke compile errors. */ +#if HAVE_BUILTIN_CHOOSE_EXPR && HAVE_BUILTIN_TYPES_COMPATIBLE_P && HAVE_TYPEOF +#define cast_sign_compatible(t, e) \ + __builtin_choose_expr( \ + __builtin_types_compatible_p(__typeof__(t), char *) || \ + __builtin_types_compatible_p(__typeof__(t), signed char *) || \ + __builtin_types_compatible_p(__typeof__(t), unsigned char *), \ + /* if type is not const qualified */ \ + __builtin_types_compatible_p(__typeof__(e), char *) || \ + __builtin_types_compatible_p(__typeof__(e), signed char *) || \ + __builtin_types_compatible_p(__typeof__(e), unsigned char *), \ + /* and if it is... */ \ + __builtin_types_compatible_p(__typeof__(e), const char *) || \ + __builtin_types_compatible_p(__typeof__(e), const signed char *) || \ + __builtin_types_compatible_p(__typeof__(e), const unsigned char *) ||\ + __builtin_types_compatible_p(__typeof__(e), char *) || \ + __builtin_types_compatible_p(__typeof__(e), signed char *) || \ + __builtin_types_compatible_p(__typeof__(e), unsigned char *) \ + ) + +#define cast_const_strip1(expr) \ + __typeof__(*(struct { int z; __typeof__(expr) x; }){0}.x) +#define cast_const_strip2(expr) \ + __typeof__(**(struct { int z; __typeof__(expr) x; }){0}.x) +#define cast_const_strip3(expr) \ + __typeof__(***(struct { int z; __typeof__(expr) x; }){0}.x) +#define cast_const_compat1(expr, type) \ + __builtin_types_compatible_p(cast_const_strip1(expr), \ + cast_const_strip1(type)) +#define cast_const_compat2(expr, type) \ + __builtin_types_compatible_p(cast_const_strip2(expr), \ + cast_const_strip2(type)) +#define cast_const_compat3(expr, type) \ + __builtin_types_compatible_p(cast_const_strip3(expr), \ + cast_const_strip3(type)) +#else +#define cast_sign_compatible(type, expr) \ + (sizeof(*(type)0) == 1 && sizeof(*(expr)) == 1) +#define cast_const_compat1(expr, type) (1) +#define cast_const_compat2(expr, type) (1) +#define cast_const_compat3(expr, type) (1) +#endif +#endif /* CCAN_CAST_H */ diff --git a/lib/ccan/cast/test/compile_fail-cast_const.c b/lib/ccan/cast/test/compile_fail-cast_const.c new file mode 100644 index 0000000000..277f3de1c4 --- /dev/null +++ b/lib/ccan/cast/test/compile_fail-cast_const.c @@ -0,0 +1,29 @@ +#include <ccan/cast/cast.h> +#include <stdlib.h> + +/* Note: this *isn't* sizeof(char) on all platforms. */ +struct char_struct { + char c; +}; + +int main(int argc, char *argv[]) +{ + char *uc; + const +#ifdef FAIL + struct char_struct +#else + char +#endif + *p = NULL; + + uc = cast_const(char *, p); + (void) uc; /* Suppress unused-but-set-variable warning. */ + return 0; +} + +#ifdef FAIL +#if !HAVE_TYPEOF||!HAVE_BUILTIN_CHOOSE_EXPR||!HAVE_BUILTIN_TYPES_COMPATIBLE_P +#error "Unfortunately we don't fail if cast_const can only use size" +#endif +#endif diff --git a/lib/ccan/cast/test/compile_fail-cast_const2.c b/lib/ccan/cast/test/compile_fail-cast_const2.c new file mode 100644 index 0000000000..e671e88eda --- /dev/null +++ b/lib/ccan/cast/test/compile_fail-cast_const2.c @@ -0,0 +1,29 @@ +#include <ccan/cast/cast.h> +#include <stdlib.h> + +/* Note: this *isn't* sizeof(char) on all platforms. */ +struct char_struct { + char c; +}; + +int main(int argc, char *argv[]) +{ + char **uc; + const +#ifdef FAIL + struct char_struct +#else + char +#endif + **p = NULL; + + uc = cast_const2(char **, p); + (void) uc; /* Suppress unused-but-set-variable warning. */ + return 0; +} + +#ifdef FAIL +#if !HAVE_TYPEOF||!HAVE_BUILTIN_CHOOSE_EXPR||!HAVE_BUILTIN_TYPES_COMPATIBLE_P +#error "Unfortunately we don't fail if cast_const can only use size" +#endif +#endif diff --git a/lib/ccan/cast/test/compile_fail-cast_const3.c b/lib/ccan/cast/test/compile_fail-cast_const3.c new file mode 100644 index 0000000000..e958e2dde5 --- /dev/null +++ b/lib/ccan/cast/test/compile_fail-cast_const3.c @@ -0,0 +1,29 @@ +#include <ccan/cast/cast.h> +#include <stdlib.h> + +/* Note: this *isn't* sizeof(char) on all platforms. */ +struct char_struct { + char c; +}; + +int main(int argc, char *argv[]) +{ + char ***uc; + const +#ifdef FAIL + struct char_struct +#else + char +#endif + ***p = NULL; + + uc = cast_const3(char ***, p); + (void) uc; /* Suppress unused-but-set-variable warning. */ + return 0; +} + +#ifdef FAIL +#if !HAVE_TYPEOF||!HAVE_BUILTIN_CHOOSE_EXPR||!HAVE_BUILTIN_TYPES_COMPATIBLE_P +#error "Unfortunately we don't fail if cast_const can only use size" +#endif +#endif diff --git a/lib/ccan/cast/test/compile_fail-cast_signed-const.c b/lib/ccan/cast/test/compile_fail-cast_signed-const.c new file mode 100644 index 0000000000..9971dc8eb3 --- /dev/null +++ b/lib/ccan/cast/test/compile_fail-cast_signed-const.c @@ -0,0 +1,22 @@ +#include <ccan/cast/cast.h> +#include <stdlib.h> + +int main(int argc, char *argv[]) +{ + unsigned char *uc; +#ifdef FAIL + const +#endif + char + *p = NULL; + + uc = cast_signed(unsigned char *, p); + (void) uc; /* Suppress unused-but-set-variable warning. */ + return 0; +} + +#ifdef FAIL +#if !HAVE_TYPEOF||!HAVE_BUILTIN_CHOOSE_EXPR||!HAVE_BUILTIN_TYPES_COMPATIBLE_P +#error "Unfortunately we don't fail if cast_const can only use size" +#endif +#endif diff --git a/lib/ccan/cast/test/compile_fail-cast_signed-sizesame.c b/lib/ccan/cast/test/compile_fail-cast_signed-sizesame.c new file mode 100644 index 0000000000..2bc40b2f46 --- /dev/null +++ b/lib/ccan/cast/test/compile_fail-cast_signed-sizesame.c @@ -0,0 +1,29 @@ +#include <ccan/cast/cast.h> +#include <stdlib.h> + +/* Note: this *isn't* sizeof(char) on all platforms. */ +struct char_struct { + char c; +}; + +int main(int argc, char *argv[]) +{ + unsigned char *uc; +#ifdef FAIL + struct char_struct +#else + char +#endif + *p = NULL; + + uc = cast_signed(unsigned char *, p); + + (void) uc; /* Suppress unused-but-set-variable warning. */ + return 0; +} + +#ifdef FAIL +#if !HAVE_TYPEOF||!HAVE_BUILTIN_CHOOSE_EXPR||!HAVE_BUILTIN_TYPES_COMPATIBLE_P +#error "Unfortunately we don't fail if cast_signed can only use size" +#endif +#endif diff --git a/lib/ccan/cast/test/compile_fail-cast_signed.c b/lib/ccan/cast/test/compile_fail-cast_signed.c new file mode 100644 index 0000000000..66bcc0a1b5 --- /dev/null +++ b/lib/ccan/cast/test/compile_fail-cast_signed.c @@ -0,0 +1,17 @@ +#include <ccan/cast/cast.h> +#include <stdlib.h> + +int main(int argc, char *argv[]) +{ + unsigned char *uc; +#ifdef FAIL + int +#else + char +#endif + *p = NULL; + + uc = cast_signed(unsigned char *, p); + (void) uc; /* Suppress unused-but-set-variable warning. */ + return 0; +} diff --git a/lib/ccan/cast/test/compile_fail-cast_static-2.c b/lib/ccan/cast/test/compile_fail-cast_static-2.c new file mode 100644 index 0000000000..8a12025384 --- /dev/null +++ b/lib/ccan/cast/test/compile_fail-cast_static-2.c @@ -0,0 +1,23 @@ +#include <ccan/cast/cast.h> +#include <stdlib.h> + +int main(int argc, char *argv[]) +{ + char *c; +#ifdef FAIL + long +#else + char +#endif + *p = 0; + + c = cast_static(char *, p); + (void) c; /* Suppress unused-but-set-variable warning. */ + return 0; +} + +#ifdef FAIL +#if !HAVE_COMPOUND_LITERALS +#error "Unfortunately we don't fail if cast_static is a noop" +#endif +#endif diff --git a/lib/ccan/cast/test/compile_fail-cast_static-3.c b/lib/ccan/cast/test/compile_fail-cast_static-3.c new file mode 100644 index 0000000000..6296b75276 --- /dev/null +++ b/lib/ccan/cast/test/compile_fail-cast_static-3.c @@ -0,0 +1,21 @@ +#include <ccan/cast/cast.h> +#include <stdlib.h> + +int main(int argc, char *argv[]) +{ + char *c; +#ifdef FAIL + const +#endif + char *p = 0; + + c = cast_static(char *, p); + (void) c; /* Suppress unused-but-set-variable warning. */ + return 0; +} + +#ifdef FAIL +#if !HAVE_COMPOUND_LITERALS +#error "Unfortunately we don't fail if cast_static is a noop" +#endif +#endif diff --git a/lib/ccan/cast/test/compile_fail-cast_static.c b/lib/ccan/cast/test/compile_fail-cast_static.c new file mode 100644 index 0000000000..0f9e478047 --- /dev/null +++ b/lib/ccan/cast/test/compile_fail-cast_static.c @@ -0,0 +1,17 @@ +#include <ccan/cast/cast.h> +#include <stdlib.h> + +int main(int argc, char *argv[]) +{ + char c; +#ifdef FAIL + char * +#else + long +#endif + x = 0; + + c = cast_static(char, x); + (void) c; /* Suppress unused-but-set-variable warning. */ + return 0; +} diff --git a/lib/ccan/cast/test/compile_ok-cast_void.c b/lib/ccan/cast/test/compile_ok-cast_void.c new file mode 100644 index 0000000000..c649d283b3 --- /dev/null +++ b/lib/ccan/cast/test/compile_ok-cast_void.c @@ -0,0 +1,12 @@ +#include <ccan/cast/cast.h> + +static void *remove_void(const void *p) +{ + return cast_const(void *, p); +} + +int main(void) +{ + void *p = remove_void("foo"); + return !p; +} diff --git a/lib/ccan/compiler/LICENSE b/lib/ccan/compiler/LICENSE new file mode 100644 index 0000000000..cca7fc278f --- /dev/null +++ b/lib/ccan/compiler/LICENSE @@ -0,0 +1,165 @@ + GNU LESSER GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/> + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + + This version of the GNU Lesser General Public License incorporates +the terms and conditions of version 3 of the GNU General Public +License, supplemented by the additional permissions listed below. + + 0. Additional Definitions. + + As used herein, "this License" refers to version 3 of the GNU Lesser +General Public License, and the "GNU GPL" refers to version 3 of the GNU +General Public License. + + "The Library" refers to a covered work governed by this License, +other than an Application or a Combined Work as defined below. + + An "Application" is any work that makes use of an interface provided +by the Library, but which is not otherwise based on the Library. +Defining a subclass of a class defined by the Library is deemed a mode +of using an interface provided by the Library. + + A "Combined Work" is a work produced by combining or linking an +Application with the Library. The particular version of the Library +with which the Combined Work was made is also called the "Linked +Version". + + The "Minimal Corresponding Source" for a Combined Work means the +Corresponding Source for the Combined Work, excluding any source code +for portions of the Combined Work that, considered in isolation, are +based on the Application, and not on the Linked Version. + + The "Corresponding Application Code" for a Combined Work means the +object code and/or source code for the Application, including any data +and utility programs needed for reproducing the Combined Work from the +Application, but excluding the System Libraries of the Combined Work. + + 1. Exception to Section 3 of the GNU GPL. + + You may convey a covered work under sections 3 and 4 of this License +without being bound by section 3 of the GNU GPL. + + 2. Conveying Modified Versions. + + If you modify a copy of the Library, and, in your modifications, a +facility refers to a function or data to be supplied by an Application +that uses the facility (other than as an argument passed when the +facility is invoked), then you may convey a copy of the modified +version: + + a) under this License, provided that you make a good faith effort to + ensure that, in the event an Application does not supply the + function or data, the facility still operates, and performs + whatever part of its purpose remains meaningful, or + + b) under the GNU GPL, with none of the additional permissions of + this License applicable to that copy. + + 3. Object Code Incorporating Material from Library Header Files. + + The object code form of an Application may incorporate material from +a header file that is part of the Library. You may convey such object +code under terms of your choice, provided that, if the incorporated +material is not limited to numerical parameters, data structure +layouts and accessors, or small macros, inline functions and templates +(ten or fewer lines in length), you do both of the following: + + a) Give prominent notice with each copy of the object code that the + Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the object code with a copy of the GNU GPL and this license + document. + + 4. Combined Works. + + You may convey a Combined Work under terms of your choice that, +taken together, effectively do not restrict modification of the +portions of the Library contained in the Combined Work and reverse +engineering for debugging such modifications, if you also do each of +the following: + + a) Give prominent notice with each copy of the Combined Work that + the Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the Combined Work with a copy of the GNU GPL and this license + document. + + c) For a Combined Work that displays copyright notices during + execution, include the copyright notice for the Library among + these notices, as well as a reference directing the user to the + copies of the GNU GPL and this license document. + + d) Do one of the following: + + 0) Convey the Minimal Corresponding Source under the terms of this + License, and the Corresponding Application Code in a form + suitable for, and under terms that permit, the user to + recombine or relink the Application with a modified version of + the Linked Version to produce a modified Combined Work, in the + manner specified by section 6 of the GNU GPL for conveying + Corresponding Source. + + 1) Use a suitable shared library mechanism for linking with the + Library. A suitable mechanism is one that (a) uses at run time + a copy of the Library already present on the user's computer + system, and (b) will operate properly with a modified version + of the Library that is interface-compatible with the Linked + Version. + + e) Provide Installation Information, but only if you would otherwise + be required to provide such information under section 6 of the + GNU GPL, and only to the extent that such information is + necessary to install and execute a modified version of the + Combined Work produced by recombining or relinking the + Application with a modified version of the Linked Version. (If + you use option 4d0, the Installation Information must accompany + the Minimal Corresponding Source and Corresponding Application + Code. If you use option 4d1, you must provide the Installation + Information in the manner specified by section 6 of the GNU GPL + for conveying Corresponding Source.) + + 5. Combined Libraries. + + You may place library facilities that are a work based on the +Library side by side in a single library together with other library +facilities that are not Applications and are not covered by this +License, and convey such a combined library under terms of your +choice, if you do both of the following: + + a) Accompany the combined library with a copy of the same work based + on the Library, uncombined with any other library facilities, + conveyed under the terms of this License. + + b) Give prominent notice with the combined library that part of it + is a work based on the Library, and explaining where to find the + accompanying uncombined form of the same work. + + 6. Revised Versions of the GNU Lesser General Public License. + + The Free Software Foundation may publish revised and/or new versions +of the GNU Lesser General Public License from time to time. Such new +versions will be similar in spirit to the present version, but may +differ in detail to address new problems or concerns. + + Each version is given a distinguishing version number. If the +Library as you received it specifies that a certain numbered version +of the GNU Lesser General Public License "or any later version" +applies to it, you have the option of following the terms and +conditions either of that published version or of any later version +published by the Free Software Foundation. If the Library as you +received it does not specify a version number of the GNU Lesser +General Public License, you may choose any version of the GNU Lesser +General Public License ever published by the Free Software Foundation. + + If the Library as you received it specifies that a proxy can decide +whether future versions of the GNU Lesser General Public License shall +apply, that proxy's public statement of acceptance of any version is +permanent authorization for you to choose that version for the +Library. diff --git a/lib/ccan/compiler/_info b/lib/ccan/compiler/_info new file mode 100644 index 0000000000..c55ba22f08 --- /dev/null +++ b/lib/ccan/compiler/_info @@ -0,0 +1,64 @@ +#include <string.h> +#include <stdio.h> +#include "config.h" + +/** + * compiler - macros for common compiler extensions + * + * Abstracts away some compiler hints. Currently these include: + * - COLD + * For functions not called in fast paths (aka. cold functions) + * - PRINTF_FMT + * For functions which take printf-style parameters. + * - IDEMPOTENT + * For functions which return the same value for same parameters. + * - NEEDED + * For functions and variables which must be emitted even if unused. + * - UNNEEDED + * For functions and variables which need not be emitted if unused. + * - UNUSED + * For parameters which are not used. + * - IS_COMPILE_CONSTANT + * For using different tradeoffs for compiletime vs runtime evaluation. + * + * License: LGPL (3 or any later version) + * Author: Rusty Russell <rusty@rustcorp.com.au> + * + * Example: + * #include <ccan/compiler/compiler.h> + * #include <stdio.h> + * #include <stdarg.h> + * + * // Example of a (slow-path) logging function. + * static int log_threshold = 2; + * static void COLD PRINTF_FMT(2,3) + * logger(int level, const char *fmt, ...) + * { + * va_list ap; + * va_start(ap, fmt); + * if (level >= log_threshold) + * vfprintf(stderr, fmt, ap); + * va_end(ap); + * } + * + * int main(int argc, char *argv[]) + * { + * if (argc != 1) { + * logger(3, "Don't want %i arguments!\n", argc-1); + * return 1; + * } + * return 0; + * } + */ +int main(int argc, char *argv[]) +{ + /* Expect exactly one argument */ + if (argc != 2) + return 1; + + if (strcmp(argv[1], "depends") == 0) { + return 0; + } + + return 1; +} diff --git a/lib/ccan/compiler/compiler.h b/lib/ccan/compiler/compiler.h new file mode 100644 index 0000000000..74e0f1835c --- /dev/null +++ b/lib/ccan/compiler/compiler.h @@ -0,0 +1,216 @@ +#ifndef CCAN_COMPILER_H +#define CCAN_COMPILER_H +#include "config.h" + +#ifndef COLD +#if HAVE_ATTRIBUTE_COLD +/** + * COLD - a function is unlikely to be called. + * + * Used to mark an unlikely code path and optimize appropriately. + * It is usually used on logging or error routines. + * + * Example: + * static void COLD moan(const char *reason) + * { + * fprintf(stderr, "Error: %s (%s)\n", reason, strerror(errno)); + * } + */ +#define COLD __attribute__((cold)) +#else +#define COLD +#endif +#endif + +#ifndef NORETURN +#if HAVE_ATTRIBUTE_NORETURN +/** + * NORETURN - a function does not return + * + * Used to mark a function which exits; useful for suppressing warnings. + * + * Example: + * static void NORETURN fail(const char *reason) + * { + * fprintf(stderr, "Error: %s (%s)\n", reason, strerror(errno)); + * exit(1); + * } + */ +#define NORETURN __attribute__((noreturn)) +#else +#define NORETURN +#endif +#endif + +#ifndef PRINTF_FMT +#if HAVE_ATTRIBUTE_PRINTF +/** + * PRINTF_FMT - a function takes printf-style arguments + * @nfmt: the 1-based number of the function's format argument. + * @narg: the 1-based number of the function's first variable argument. + * + * This allows the compiler to check your parameters as it does for printf(). + * + * Example: + * void PRINTF_FMT(2,3) my_printf(const char *prefix, const char *fmt, ...); + */ +#define PRINTF_FMT(nfmt, narg) \ + __attribute__((format(__printf__, nfmt, narg))) +#else +#define PRINTF_FMT(nfmt, narg) +#endif +#endif + +#ifndef IDEMPOTENT +#if HAVE_ATTRIBUTE_CONST +/** + * IDEMPOTENT - a function's return depends only on its argument + * + * This allows the compiler to assume that the function will return the exact + * same value for the exact same arguments. This implies that the function + * must not use global variables, or dereference pointer arguments. + */ +#define IDEMPOTENT __attribute__((const)) +#else +#define IDEMPOTENT +#endif +#endif + +#if HAVE_ATTRIBUTE_UNUSED +#ifndef UNNEEDED +/** + * UNNEEDED - a variable/function may not be needed + * + * This suppresses warnings about unused variables or functions, but tells + * the compiler that if it is unused it need not emit it into the source code. + * + * Example: + * // With some preprocessor options, this is unnecessary. + * static UNNEEDED int counter; + * + * // With some preprocessor options, this is unnecessary. + * static UNNEEDED void add_to_counter(int add) + * { + * counter += add; + * } + */ +#define UNNEEDED __attribute__((unused)) +#endif + +#ifndef NEEDED +#if HAVE_ATTRIBUTE_USED +/** + * NEEDED - a variable/function is needed + * + * This suppresses warnings about unused variables or functions, but tells + * the compiler that it must exist even if it (seems) unused. + * + * Example: + * // Even if this is unused, these are vital for debugging. + * static NEEDED int counter; + * static NEEDED void dump_counter(void) + * { + * printf("Counter is %i\n", counter); + * } + */ +#define NEEDED __attribute__((used)) +#else +/* Before used, unused functions and vars were always emitted. */ +#define NEEDED __attribute__((unused)) +#endif +#endif + +#ifndef UNUSED +/** + * UNUSED - a parameter is unused + * + * Some compilers (eg. gcc with -W or -Wunused) warn about unused + * function parameters. This suppresses such warnings and indicates + * to the reader that it's deliberate. + * + * Example: + * // This is used as a callback, so needs to have this prototype. + * static int some_callback(void *unused UNUSED) + * { + * return 0; + * } + */ +#define UNUSED __attribute__((unused)) +#endif +#else +#ifndef UNNEEDED +#define UNNEEDED +#endif +#ifndef NEEDED +#define NEEDED +#endif +#ifndef UNUSED +#define UNUSED +#endif +#endif + +#ifndef IS_COMPILE_CONSTANT +#if HAVE_BUILTIN_CONSTANT_P +/** + * IS_COMPILE_CONSTANT - does the compiler know the value of this expression? + * @expr: the expression to evaluate + * + * When an expression manipulation is complicated, it is usually better to + * implement it in a function. However, if the expression being manipulated is + * known at compile time, it is better to have the compiler see the entire + * expression so it can simply substitute the result. + * + * This can be done using the IS_COMPILE_CONSTANT() macro. + * + * Example: + * enum greek { ALPHA, BETA, GAMMA, DELTA, EPSILON }; + * + * // Out-of-line version. + * const char *greek_name(enum greek greek); + * + * // Inline version. + * static inline const char *_greek_name(enum greek greek) + * { + * switch (greek) { + * case ALPHA: return "alpha"; + * case BETA: return "beta"; + * case GAMMA: return "gamma"; + * case DELTA: return "delta"; + * case EPSILON: return "epsilon"; + * default: return "**INVALID**"; + * } + * } + * + * // Use inline if compiler knows answer. Otherwise call function + * // to avoid copies of the same code everywhere. + * #define greek_name(g) \ + * (IS_COMPILE_CONSTANT(greek) ? _greek_name(g) : greek_name(g)) + */ +#define IS_COMPILE_CONSTANT(expr) __builtin_constant_p(expr) +#else +/* If we don't know, assume it's not. */ +#define IS_COMPILE_CONSTANT(expr) 0 +#endif +#endif + +#ifndef WARN_UNUSED_RESULT +#if HAVE_WARN_UNUSED_RESULT +/** + * WARN_UNUSED_RESULT - warn if a function return value is unused. + * + * Used to mark a function where it is extremely unlikely that the caller + * can ignore the result, eg realloc(). + * + * Example: + * // buf param may be freed by this; need return value! + * static char *WARN_UNUSED_RESULT enlarge(char *buf, unsigned *size) + * { + * return realloc(buf, (*size) *= 2); + * } + */ +#define WARN_UNUSED_RESULT __attribute__((warn_unused_result)) +#else +#define WARN_UNUSED_RESULT +#endif +#endif +#endif /* CCAN_COMPILER_H */ diff --git a/lib/ccan/compiler/test/compile_fail-printf.c b/lib/ccan/compiler/test/compile_fail-printf.c new file mode 100644 index 0000000000..8f34ae5a12 --- /dev/null +++ b/lib/ccan/compiler/test/compile_fail-printf.c @@ -0,0 +1,22 @@ +#include <ccan/compiler/compiler.h> + +static void PRINTF_FMT(2,3) my_printf(int x, const char *fmt, ...) +{ +} + +int main(int argc, char *argv[]) +{ + unsigned int i = 0; + + my_printf(1, "Not a pointer " +#ifdef FAIL + "%p", +#if !HAVE_ATTRIBUTE_PRINTF +#error "Unfortunately we don't fail if !HAVE_ATTRIBUTE_PRINTF." +#endif +#else + "%i", +#endif + i); + return 0; +} diff --git a/lib/ccan/compiler/test/run-is_compile_constant.c b/lib/ccan/compiler/test/run-is_compile_constant.c new file mode 100644 index 0000000000..a66f2e13e6 --- /dev/null +++ b/lib/ccan/compiler/test/run-is_compile_constant.c @@ -0,0 +1,15 @@ +#include <ccan/compiler/compiler.h> +#include <ccan/tap/tap.h> + +int main(int argc, char *argv[]) +{ + plan_tests(2); + + ok1(!IS_COMPILE_CONSTANT(argc)); +#if HAVE_BUILTIN_CONSTANT_P + ok1(IS_COMPILE_CONSTANT(7)); +#else + pass("If !HAVE_BUILTIN_CONSTANT_P, IS_COMPILE_CONSTANT always false"); +#endif + return exit_status(); +} diff --git a/lib/ccan/endian/LICENSE b/lib/ccan/endian/LICENSE new file mode 100644 index 0000000000..5522aa5f33 --- /dev/null +++ b/lib/ccan/endian/LICENSE @@ -0,0 +1,508 @@ + + GNU LESSER GENERAL PUBLIC LICENSE + Version 2.1, February 1999 + + Copyright (C) 1991, 1999 Free Software Foundation, Inc. + 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + +[This is the first released version of the Lesser GPL. It also counts + as the successor of the GNU Library Public License, version 2, hence + the version number 2.1.] + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +Licenses are intended to guarantee your freedom to share and change +free software--to make sure the software is free for all its users. + + This license, the Lesser General Public License, applies to some +specially designated software packages--typically libraries--of the +Free Software Foundation and other authors who decide to use it. You +can use it too, but we suggest you first think carefully about whether +this license or the ordinary General Public License is the better +strategy to use in any particular case, based on the explanations +below. + + When we speak of free software, we are referring to freedom of use, +not price. Our General Public Licenses are designed to make sure that +you have the freedom to distribute copies of free software (and charge +for this service if you wish); that you receive source code or can get +it if you want it; that you can change the software and use pieces of +it in new free programs; and that you are informed that you can do +these things. + + To protect your rights, we need to make restrictions that forbid +distributors to deny you these rights or to ask you to surrender these +rights. These restrictions translate to certain responsibilities for +you if you distribute copies of the library or if you modify it. + + For example, if you distribute copies of the library, whether gratis +or for a fee, you must give the recipients all the rights that we gave +you. You must make sure that they, too, receive or can get the source +code. If you link other code with the library, you must provide +complete object files to the recipients, so that they can relink them +with the library after making changes to the library and recompiling +it. And you must show them these terms so they know their rights. + + We protect your rights with a two-step method: (1) we copyright the +library, and (2) we offer you this license, which gives you legal +permission to copy, distribute and/or modify the library. + + To protect each distributor, we want to make it very clear that +there is no warranty for the free library. Also, if the library is +modified by someone else and passed on, the recipients should know +that what they have is not the original version, so that the original +author's reputation will not be affected by problems that might be +introduced by others. + + Finally, software patents pose a constant threat to the existence of +any free program. We wish to make sure that a company cannot +effectively restrict the users of a free program by obtaining a +restrictive license from a patent holder. Therefore, we insist that +any patent license obtained for a version of the library must be +consistent with the full freedom of use specified in this license. + + Most GNU software, including some libraries, is covered by the +ordinary GNU General Public License. This license, the GNU Lesser +General Public License, applies to certain designated libraries, and +is quite different from the ordinary General Public License. We use +this license for certain libraries in order to permit linking those +libraries into non-free programs. + + When a program is linked with a library, whether statically or using +a shared library, the combination of the two is legally speaking a +combined work, a derivative of the original library. The ordinary +General Public License therefore permits such linking only if the +entire combination fits its criteria of freedom. The Lesser General +Public License permits more lax criteria for linking other code with +the library. + + We call this license the "Lesser" General Public License because it +does Less to protect the user's freedom than the ordinary General +Public License. It also provides other free software developers Less +of an advantage over competing non-free programs. These disadvantages +are the reason we use the ordinary General Public License for many +libraries. However, the Lesser license provides advantages in certain +special circumstances. + + For example, on rare occasions, there may be a special need to +encourage the widest possible use of a certain library, so that it +becomes a de-facto standard. To achieve this, non-free programs must +be allowed to use the library. A more frequent case is that a free +library does the same job as widely used non-free libraries. In this +case, there is little to gain by limiting the free library to free +software only, so we use the Lesser General Public License. + + In other cases, permission to use a particular library in non-free +programs enables a greater number of people to use a large body of +free software. For example, permission to use the GNU C Library in +non-free programs enables many more people to use the whole GNU +operating system, as well as its variant, the GNU/Linux operating +system. + + Although the Lesser General Public License is Less protective of the +users' freedom, it does ensure that the user of a program that is +linked with the Library has the freedom and the wherewithal to run +that program using a modified version of the Library. + + The precise terms and conditions for copying, distribution and +modification follow. Pay close attention to the difference between a +"work based on the library" and a "work that uses the library". The +former contains code derived from the library, whereas the latter must +be combined with the library in order to run. + + GNU LESSER GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License Agreement applies to any software library or other +program which contains a notice placed by the copyright holder or +other authorized party saying it may be distributed under the terms of +this Lesser General Public License (also called "this License"). +Each licensee is addressed as "you". + + A "library" means a collection of software functions and/or data +prepared so as to be conveniently linked with application programs +(which use some of those functions and data) to form executables. + + The "Library", below, refers to any such software library or work +which has been distributed under these terms. A "work based on the +Library" means either the Library or any derivative work under +copyright law: that is to say, a work containing the Library or a +portion of it, either verbatim or with modifications and/or translated +straightforwardly into another language. (Hereinafter, translation is +included without limitation in the term "modification".) + + "Source code" for a work means the preferred form of the work for +making modifications to it. For a library, complete source code means +all the source code for all modules it contains, plus any associated +interface definition files, plus the scripts used to control +compilation and installation of the library. + + Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running a program using the Library is not restricted, and output from +such a program is covered only if its contents constitute a work based +on the Library (independent of the use of the Library in a tool for +writing it). Whether that is true depends on what the Library does +and what the program that uses the Library does. + + 1. You may copy and distribute verbatim copies of the Library's +complete source code as you receive it, in any medium, provided that +you conspicuously and appropriately publish on each copy an +appropriate copyright notice and disclaimer of warranty; keep intact +all the notices that refer to this License and to the absence of any +warranty; and distribute a copy of this License along with the +Library. + + You may charge a fee for the physical act of transferring a copy, +and you may at your option offer warranty protection in exchange for a +fee. + + 2. You may modify your copy or copies of the Library or any portion +of it, thus forming a work based on the Library, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) The modified work must itself be a software library. + + b) You must cause the files modified to carry prominent notices + stating that you changed the files and the date of any change. + + c) You must cause the whole of the work to be licensed at no + charge to all third parties under the terms of this License. + + d) If a facility in the modified Library refers to a function or a + table of data to be supplied by an application program that uses + the facility, other than as an argument passed when the facility + is invoked, then you must make a good faith effort to ensure that, + in the event an application does not supply such function or + table, the facility still operates, and performs whatever part of + its purpose remains meaningful. + + (For example, a function in a library to compute square roots has + a purpose that is entirely well-defined independent of the + application. Therefore, Subsection 2d requires that any + application-supplied function or table used by this function must + be optional: if the application does not supply it, the square + root function must still compute square roots.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Library, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Library, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote +it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Library. + +In addition, mere aggregation of another work not based on the Library +with the Library (or with a work based on the Library) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may opt to apply the terms of the ordinary GNU General Public +License instead of this License to a given copy of the Library. To do +this, you must alter all the notices that refer to this License, so +that they refer to the ordinary GNU General Public License, version 2, +instead of to this License. (If a newer version than version 2 of the +ordinary GNU General Public License has appeared, then you can specify +that version instead if you wish.) Do not make any other change in +these notices. + + Once this change is made in a given copy, it is irreversible for +that copy, so the ordinary GNU General Public License applies to all +subsequent copies and derivative works made from that copy. + + This option is useful when you wish to copy part of the code of +the Library into a program that is not a library. + + 4. You may copy and distribute the Library (or a portion or +derivative of it, under Section 2) in object code or executable form +under the terms of Sections 1 and 2 above provided that you accompany +it with the complete corresponding machine-readable source code, which +must be distributed under the terms of Sections 1 and 2 above on a +medium customarily used for software interchange. + + If distribution of object code is made by offering access to copy +from a designated place, then offering equivalent access to copy the +source code from the same place satisfies the requirement to +distribute the source code, even though third parties are not +compelled to copy the source along with the object code. + + 5. A program that contains no derivative of any portion of the +Library, but is designed to work with the Library by being compiled or +linked with it, is called a "work that uses the Library". Such a +work, in isolation, is not a derivative work of the Library, and +therefore falls outside the scope of this License. + + However, linking a "work that uses the Library" with the Library +creates an executable that is a derivative of the Library (because it +contains portions of the Library), rather than a "work that uses the +library". The executable is therefore covered by this License. +Section 6 states terms for distribution of such executables. + + When a "work that uses the Library" uses material from a header file +that is part of the Library, the object code for the work may be a +derivative work of the Library even though the source code is not. +Whether this is true is especially significant if the work can be +linked without the Library, or if the work is itself a library. The +threshold for this to be true is not precisely defined by law. + + If such an object file uses only numerical parameters, data +structure layouts and accessors, and small macros and small inline +functions (ten lines or less in length), then the use of the object +file is unrestricted, regardless of whether it is legally a derivative +work. (Executables containing this object code plus portions of the +Library will still fall under Section 6.) + + Otherwise, if the work is a derivative of the Library, you may +distribute the object code for the work under the terms of Section 6. +Any executables containing that work also fall under Section 6, +whether or not they are linked directly with the Library itself. + + 6. As an exception to the Sections above, you may also combine or +link a "work that uses the Library" with the Library to produce a +work containing portions of the Library, and distribute that work +under terms of your choice, provided that the terms permit +modification of the work for the customer's own use and reverse +engineering for debugging such modifications. + + You must give prominent notice with each copy of the work that the +Library is used in it and that the Library and its use are covered by +this License. You must supply a copy of this License. If the work +during execution displays copyright notices, you must include the +copyright notice for the Library among them, as well as a reference +directing the user to the copy of this License. Also, you must do one +of these things: + + a) Accompany the work with the complete corresponding + machine-readable source code for the Library including whatever + changes were used in the work (which must be distributed under + Sections 1 and 2 above); and, if the work is an executable linked + with the Library, with the complete machine-readable "work that + uses the Library", as object code and/or source code, so that the + user can modify the Library and then relink to produce a modified + executable containing the modified Library. (It is understood + that the user who changes the contents of definitions files in the + Library will not necessarily be able to recompile the application + to use the modified definitions.) + + b) Use a suitable shared library mechanism for linking with the + Library. A suitable mechanism is one that (1) uses at run time a + copy of the library already present on the user's computer system, + rather than copying library functions into the executable, and (2) + will operate properly with a modified version of the library, if + the user installs one, as long as the modified version is + interface-compatible with the version that the work was made with. + + c) Accompany the work with a written offer, valid for at least + three years, to give the same user the materials specified in + Subsection 6a, above, for a charge no more than the cost of + performing this distribution. + + d) If distribution of the work is made by offering access to copy + from a designated place, offer equivalent access to copy the above + specified materials from the same place. + + e) Verify that the user has already received a copy of these + materials or that you have already sent this user a copy. + + For an executable, the required form of the "work that uses the +Library" must include any data and utility programs needed for +reproducing the executable from it. However, as a special exception, +the materials to be distributed need not include anything that is +normally distributed (in either source or binary form) with the major +components (compiler, kernel, and so on) of the operating system on +which the executable runs, unless that component itself accompanies +the executable. + + It may happen that this requirement contradicts the license +restrictions of other proprietary libraries that do not normally +accompany the operating system. Such a contradiction means you cannot +use both them and the Library together in an executable that you +distribute. + + 7. You may place library facilities that are a work based on the +Library side-by-side in a single library together with other library +facilities not covered by this License, and distribute such a combined +library, provided that the separate distribution of the work based on +the Library and of the other library facilities is otherwise +permitted, and provided that you do these two things: + + a) Accompany the combined library with a copy of the same work + based on the Library, uncombined with any other library + facilities. This must be distributed under the terms of the + Sections above. + + b) Give prominent notice with the combined library of the fact + that part of it is a work based on the Library, and explaining + where to find the accompanying uncombined form of the same work. + + 8. You may not copy, modify, sublicense, link with, or distribute +the Library except as expressly provided under this License. Any +attempt otherwise to copy, modify, sublicense, link with, or +distribute the Library is void, and will automatically terminate your +rights under this License. However, parties who have received copies, +or rights, from you under this License will not have their licenses +terminated so long as such parties remain in full compliance. + + 9. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Library or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Library (or any work based on the +Library), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Library or works based on it. + + 10. Each time you redistribute the Library (or any work based on the +Library), the recipient automatically receives a license from the +original licensor to copy, distribute, link with or modify the Library +subject to these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties with +this License. + + 11. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Library at all. For example, if a patent +license would not permit royalty-free redistribution of the Library by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Library. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply, and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 12. If the distribution and/or use of the Library is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Library under this License +may add an explicit geographical distribution limitation excluding those +countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 13. The Free Software Foundation may publish revised and/or new +versions of the Lesser General Public License from time to time. +Such new versions will be similar in spirit to the present version, +but may differ in detail to address new problems or concerns. + +Each version is given a distinguishing version number. If the Library +specifies a version number of this License which applies to it and +"any later version", you have the option of following the terms and +conditions either of that version or of any later version published by +the Free Software Foundation. If the Library does not specify a +license version number, you may choose any version ever published by +the Free Software Foundation. + + 14. If you wish to incorporate parts of the Library into other free +programs whose distribution conditions are incompatible with these, +write to the author to ask for permission. For software which is +copyrighted by the Free Software Foundation, write to the Free +Software Foundation; we sometimes make exceptions for this. Our +decision will be guided by the two goals of preserving the free status +of all derivatives of our free software and of promoting the sharing +and reuse of software generally. + + NO WARRANTY + + 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO +WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. +EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR +OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY +KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE +LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME +THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN +WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY +AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU +FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR +CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE +LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING +RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A +FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF +SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH +DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Libraries + + If you develop a new library, and you want it to be of the greatest +possible use to the public, we recommend making it free software that +everyone can redistribute and change. You can do so by permitting +redistribution under these terms (or, alternatively, under the terms +of the ordinary General Public License). + + To apply these terms, attach the following notices to the library. +It is safest to attach them to the start of each source file to most +effectively convey the exclusion of warranty; and each file should +have at least the "copyright" line and a pointer to where the full +notice is found. + + + <one line to give the library's name and a brief idea of what it does.> + Copyright (C) <year> <name of author> + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +Also add information on how to contact you by electronic and paper mail. + +You should also get your employer (if you work as a programmer) or +your school, if any, to sign a "copyright disclaimer" for the library, +if necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the + library `Frob' (a library for tweaking knobs) written by James + Random Hacker. + + <signature of Ty Coon>, 1 April 1990 + Ty Coon, President of Vice + +That's all there is to it! diff --git a/lib/ccan/endian/_info b/lib/ccan/endian/_info new file mode 100644 index 0000000000..5d4d65ff74 --- /dev/null +++ b/lib/ccan/endian/_info @@ -0,0 +1,53 @@ +#include <stdio.h> +#include <string.h> +#include "config.h" + +/** + * endian - endian conversion macros for simple types + * + * Portable protocols (such as on-disk formats, or network protocols) + * are often defined to be a particular endian: little-endian (least + * significant bytes first) or big-endian (most significant bytes + * first). + * + * Similarly, some CPUs lay out values in memory in little-endian + * order (most commonly, Intel's 8086 and derivatives), or big-endian + * order (almost everyone else). + * + * This module provides conversion routines, inspired by the linux kernel. + * + * Example: + * #include <stdio.h> + * #include <err.h> + * #include <ccan/endian/endian.h> + * + * // + * int main(int argc, char *argv[]) + * { + * uint32_t value; + * + * if (argc != 2) + * errx(1, "Usage: %s <value>", argv[0]); + * + * value = atoi(argv[1]); + * printf("native: %08x\n", value); + * printf("little-endian: %08x\n", cpu_to_le32(value)); + * printf("big-endian: %08x\n", cpu_to_be32(value)); + * printf("byte-reversed: %08x\n", bswap_32(value)); + * exit(0); + * } + * + * License: LGPL (2 or any later version) + * Author: Rusty Russell <rusty@rustcorp.com.au> + */ +int main(int argc, char *argv[]) +{ + if (argc != 2) + return 1; + + if (strcmp(argv[1], "depends") == 0) + /* Nothing */ + return 0; + + return 1; +} diff --git a/lib/ccan/endian/endian.h b/lib/ccan/endian/endian.h new file mode 100644 index 0000000000..baee60be14 --- /dev/null +++ b/lib/ccan/endian/endian.h @@ -0,0 +1,226 @@ +#ifndef CCAN_ENDIAN_H +#define CCAN_ENDIAN_H +#include <stdint.h> +#include "config.h" + +#if HAVE_BYTESWAP_H +#include <byteswap.h> +#else +/** + * bswap_16 - reverse bytes in a uint16_t value. + * @val: value whose bytes to swap. + * + * Example: + * // Output contains "1024 is 4 as two bytes reversed" + * printf("1024 is %u as two bytes reversed\n", bswap_16(1024)); + */ +static inline uint16_t bswap_16(uint16_t val) +{ + return ((val & (uint16_t)0x00ffU) << 8) + | ((val & (uint16_t)0xff00U) >> 8); +} + +/** + * bswap_32 - reverse bytes in a uint32_t value. + * @val: value whose bytes to swap. + * + * Example: + * // Output contains "1024 is 262144 as four bytes reversed" + * printf("1024 is %u as four bytes reversed\n", bswap_32(1024)); + */ +static inline uint32_t bswap_32(uint32_t val) +{ + return ((val & (uint32_t)0x000000ffUL) << 24) + | ((val & (uint32_t)0x0000ff00UL) << 8) + | ((val & (uint32_t)0x00ff0000UL) >> 8) + | ((val & (uint32_t)0xff000000UL) >> 24); +} +#endif /* !HAVE_BYTESWAP_H */ + +#if !HAVE_BSWAP_64 +/** + * bswap_64 - reverse bytes in a uint64_t value. + * @val: value whose bytes to swap. + * + * Example: + * // Output contains "1024 is 1125899906842624 as eight bytes reversed" + * printf("1024 is %llu as eight bytes reversed\n", + * (unsigned long long)bswap_64(1024)); + */ +static inline uint64_t bswap_64(uint64_t val) +{ + return ((val & (uint64_t)0x00000000000000ffULL) << 56) + | ((val & (uint64_t)0x000000000000ff00ULL) << 40) + | ((val & (uint64_t)0x0000000000ff0000ULL) << 24) + | ((val & (uint64_t)0x00000000ff000000ULL) << 8) + | ((val & (uint64_t)0x000000ff00000000ULL) >> 8) + | ((val & (uint64_t)0x0000ff0000000000ULL) >> 24) + | ((val & (uint64_t)0x00ff000000000000ULL) >> 40) + | ((val & (uint64_t)0xff00000000000000ULL) >> 56); +} +#endif + +/* Sanity check the defines. We don't handle weird endianness. */ +#if !HAVE_LITTLE_ENDIAN && !HAVE_BIG_ENDIAN +#error "Unknown endian" +#elif HAVE_LITTLE_ENDIAN && HAVE_BIG_ENDIAN +#error "Can't compile for both big and little endian." +#endif + +/** + * cpu_to_le64 - convert a uint64_t value to little-endian + * @native: value to convert + */ +static inline uint64_t cpu_to_le64(uint64_t native) +{ +#if HAVE_LITTLE_ENDIAN + return native; +#else + return bswap_64(native); +#endif +} + +/** + * cpu_to_le32 - convert a uint32_t value to little-endian + * @native: value to convert + */ +static inline uint32_t cpu_to_le32(uint32_t native) +{ +#if HAVE_LITTLE_ENDIAN + return native; +#else + return bswap_32(native); +#endif +} + +/** + * cpu_to_le16 - convert a uint16_t value to little-endian + * @native: value to convert + */ +static inline uint16_t cpu_to_le16(uint16_t native) +{ +#if HAVE_LITTLE_ENDIAN + return native; +#else + return bswap_16(native); +#endif +} + +/** + * le64_to_cpu - convert a little-endian uint64_t value + * @le_val: little-endian value to convert + */ +static inline uint64_t le64_to_cpu(uint64_t le_val) +{ +#if HAVE_LITTLE_ENDIAN + return le_val; +#else + return bswap_64(le_val); +#endif +} + +/** + * le32_to_cpu - convert a little-endian uint32_t value + * @le_val: little-endian value to convert + */ +static inline uint32_t le32_to_cpu(uint32_t le_val) +{ +#if HAVE_LITTLE_ENDIAN + return le_val; +#else + return bswap_32(le_val); +#endif +} + +/** + * le16_to_cpu - convert a little-endian uint16_t value + * @le_val: little-endian value to convert + */ +static inline uint16_t le16_to_cpu(uint16_t le_val) +{ +#if HAVE_LITTLE_ENDIAN + return le_val; +#else + return bswap_16(le_val); +#endif +} + +/** + * cpu_to_be64 - convert a uint64_t value to big endian. + * @native: value to convert + */ +static inline uint64_t cpu_to_be64(uint64_t native) +{ +#if HAVE_LITTLE_ENDIAN + return bswap_64(native); +#else + return native; +#endif +} + +/** + * cpu_to_be32 - convert a uint32_t value to big endian. + * @native: value to convert + */ +static inline uint32_t cpu_to_be32(uint32_t native) +{ +#if HAVE_LITTLE_ENDIAN + return bswap_32(native); +#else + return native; +#endif +} + +/** + * cpu_to_be16 - convert a uint16_t value to big endian. + * @native: value to convert + */ +static inline uint16_t cpu_to_be16(uint16_t native) +{ +#if HAVE_LITTLE_ENDIAN + return bswap_16(native); +#else + return native; +#endif +} + +/** + * be64_to_cpu - convert a big-endian uint64_t value + * @be_val: big-endian value to convert + */ +static inline uint64_t be64_to_cpu(uint64_t be_val) +{ +#if HAVE_LITTLE_ENDIAN + return bswap_64(be_val); +#else + return be_val; +#endif +} + +/** + * be32_to_cpu - convert a big-endian uint32_t value + * @be_val: big-endian value to convert + */ +static inline uint32_t be32_to_cpu(uint32_t be_val) +{ +#if HAVE_LITTLE_ENDIAN + return bswap_32(be_val); +#else + return be_val; +#endif +} + +/** + * be16_to_cpu - convert a big-endian uint16_t value + * @be_val: big-endian value to convert + */ +static inline uint16_t be16_to_cpu(uint16_t be_val) +{ +#if HAVE_LITTLE_ENDIAN + return bswap_16(be_val); +#else + return be_val; +#endif +} + +#endif /* CCAN_ENDIAN_H */ diff --git a/lib/ccan/endian/test/run.c b/lib/ccan/endian/test/run.c new file mode 100644 index 0000000000..a00fce74e4 --- /dev/null +++ b/lib/ccan/endian/test/run.c @@ -0,0 +1,106 @@ +#include <ccan/endian/endian.h> +#include <stdlib.h> +#include <stddef.h> +#include <ccan/tap/tap.h> + +int main(int argc, char *argv[]) +{ + union { + uint64_t u64; + unsigned char u64_bytes[8]; + } u64; + union { + uint32_t u32; + unsigned char u32_bytes[4]; + } u32; + union { + uint16_t u16; + unsigned char u16_bytes[2]; + } u16; + + plan_tests(48); + + /* Straight swap tests. */ + u64.u64_bytes[0] = 0x00; + u64.u64_bytes[1] = 0x11; + u64.u64_bytes[2] = 0x22; + u64.u64_bytes[3] = 0x33; + u64.u64_bytes[4] = 0x44; + u64.u64_bytes[5] = 0x55; + u64.u64_bytes[6] = 0x66; + u64.u64_bytes[7] = 0x77; + u64.u64 = bswap_64(u64.u64); + ok1(u64.u64_bytes[7] == 0x00); + ok1(u64.u64_bytes[6] == 0x11); + ok1(u64.u64_bytes[5] == 0x22); + ok1(u64.u64_bytes[4] == 0x33); + ok1(u64.u64_bytes[3] == 0x44); + ok1(u64.u64_bytes[2] == 0x55); + ok1(u64.u64_bytes[1] == 0x66); + ok1(u64.u64_bytes[0] == 0x77); + + u32.u32_bytes[0] = 0x00; + u32.u32_bytes[1] = 0x11; + u32.u32_bytes[2] = 0x22; + u32.u32_bytes[3] = 0x33; + u32.u32 = bswap_32(u32.u32); + ok1(u32.u32_bytes[3] == 0x00); + ok1(u32.u32_bytes[2] == 0x11); + ok1(u32.u32_bytes[1] == 0x22); + ok1(u32.u32_bytes[0] == 0x33); + + u16.u16_bytes[0] = 0x00; + u16.u16_bytes[1] = 0x11; + u16.u16 = bswap_16(u16.u16); + ok1(u16.u16_bytes[1] == 0x00); + ok1(u16.u16_bytes[0] == 0x11); + + /* Endian tests. */ + u64.u64 = cpu_to_le64(0x0011223344556677ULL); + ok1(u64.u64_bytes[0] == 0x77); + ok1(u64.u64_bytes[1] == 0x66); + ok1(u64.u64_bytes[2] == 0x55); + ok1(u64.u64_bytes[3] == 0x44); + ok1(u64.u64_bytes[4] == 0x33); + ok1(u64.u64_bytes[5] == 0x22); + ok1(u64.u64_bytes[6] == 0x11); + ok1(u64.u64_bytes[7] == 0x00); + ok1(le64_to_cpu(u64.u64) == 0x0011223344556677ULL); + + u64.u64 = cpu_to_be64(0x0011223344556677ULL); + ok1(u64.u64_bytes[7] == 0x77); + ok1(u64.u64_bytes[6] == 0x66); + ok1(u64.u64_bytes[5] == 0x55); + ok1(u64.u64_bytes[4] == 0x44); + ok1(u64.u64_bytes[3] == 0x33); + ok1(u64.u64_bytes[2] == 0x22); + ok1(u64.u64_bytes[1] == 0x11); + ok1(u64.u64_bytes[0] == 0x00); + ok1(be64_to_cpu(u64.u64) == 0x0011223344556677ULL); + + u32.u32 = cpu_to_le32(0x00112233); + ok1(u32.u32_bytes[0] == 0x33); + ok1(u32.u32_bytes[1] == 0x22); + ok1(u32.u32_bytes[2] == 0x11); + ok1(u32.u32_bytes[3] == 0x00); + ok1(le32_to_cpu(u32.u32) == 0x00112233); + + u32.u32 = cpu_to_be32(0x00112233); + ok1(u32.u32_bytes[3] == 0x33); + ok1(u32.u32_bytes[2] == 0x22); + ok1(u32.u32_bytes[1] == 0x11); + ok1(u32.u32_bytes[0] == 0x00); + ok1(be32_to_cpu(u32.u32) == 0x00112233); + + u16.u16 = cpu_to_le16(0x0011); + ok1(u16.u16_bytes[0] == 0x11); + ok1(u16.u16_bytes[1] == 0x00); + ok1(le16_to_cpu(u16.u16) == 0x0011); + + u16.u16 = cpu_to_be16(0x0011); + ok1(u16.u16_bytes[1] == 0x11); + ok1(u16.u16_bytes[0] == 0x00); + ok1(be16_to_cpu(u16.u16) == 0x0011); + + exit(exit_status()); +} diff --git a/lib/ccan/hash/_info b/lib/ccan/hash/_info new file mode 100644 index 0000000000..5aeb912136 --- /dev/null +++ b/lib/ccan/hash/_info @@ -0,0 +1,31 @@ +#include <string.h> +#include <stdio.h> + +/** + * hash - routines for hashing bytes + * + * When creating a hash table it's important to have a hash function + * which mixes well and is fast. This package supplies such functions. + * + * The hash functions come in two flavors: the normal ones and the + * stable ones. The normal ones can vary from machine-to-machine and + * may change if we find better or faster hash algorithms in future. + * The stable ones will always give the same results on any computer, + * and on any version of this package. + * + * License: Public Domain + * Maintainer: Rusty Russell <rusty@rustcorp.com.au> + * Author: Bob Jenkins <bob_jenkins@burtleburtle.net> + */ +int main(int argc, char *argv[]) +{ + if (argc != 2) + return 1; + + if (strcmp(argv[1], "depends") == 0) { + printf("ccan/build_assert\n"); + return 0; + } + + return 1; +} diff --git a/lib/ccan/hash/hash.c b/lib/ccan/hash/hash.c new file mode 100644 index 0000000000..0fd6109513 --- /dev/null +++ b/lib/ccan/hash/hash.c @@ -0,0 +1,925 @@ +/* +------------------------------------------------------------------------------- +lookup3.c, by Bob Jenkins, May 2006, Public Domain. + +These are functions for producing 32-bit hashes for hash table lookup. +hash_word(), hashlittle(), hashlittle2(), hashbig(), mix(), and final() +are externally useful functions. Routines to test the hash are included +if SELF_TEST is defined. You can use this free for any purpose. It's in +the public domain. It has no warranty. + +You probably want to use hashlittle(). hashlittle() and hashbig() +hash byte arrays. hashlittle() is is faster than hashbig() on +little-endian machines. Intel and AMD are little-endian machines. +On second thought, you probably want hashlittle2(), which is identical to +hashlittle() except it returns two 32-bit hashes for the price of one. +You could implement hashbig2() if you wanted but I haven't bothered here. + +If you want to find a hash of, say, exactly 7 integers, do + a = i1; b = i2; c = i3; + mix(a,b,c); + a += i4; b += i5; c += i6; + mix(a,b,c); + a += i7; + final(a,b,c); +then use c as the hash value. If you have a variable length array of +4-byte integers to hash, use hash_word(). If you have a byte array (like +a character string), use hashlittle(). If you have several byte arrays, or +a mix of things, see the comments above hashlittle(). + +Why is this so big? I read 12 bytes at a time into 3 4-byte integers, +then mix those integers. This is fast (you can do a lot more thorough +mixing with 12*3 instructions on 3 integers than you can with 3 instructions +on 1 byte), but shoehorning those bytes into integers efficiently is messy. +------------------------------------------------------------------------------- +*/ +//#define SELF_TEST 1 + +#if 0 +#include <stdio.h> /* defines printf for tests */ +#include <time.h> /* defines time_t for timings in the test */ +#include <stdint.h> /* defines uint32_t etc */ +#include <sys/param.h> /* attempt to define endianness */ + +#ifdef linux +# include <endian.h> /* attempt to define endianness */ +#endif + +/* + * My best guess at if you are big-endian or little-endian. This may + * need adjustment. + */ +#if (defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && \ + __BYTE_ORDER == __LITTLE_ENDIAN) || \ + (defined(i386) || defined(__i386__) || defined(__i486__) || \ + defined(__i586__) || defined(__i686__) || defined(__x86_64) || \ + defined(vax) || defined(MIPSEL)) +# define HASH_LITTLE_ENDIAN 1 +# define HASH_BIG_ENDIAN 0 +#elif (defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && \ + __BYTE_ORDER == __BIG_ENDIAN) || \ + (defined(sparc) || defined(POWERPC) || defined(mc68000) || defined(sel)) +# define HASH_LITTLE_ENDIAN 0 +# define HASH_BIG_ENDIAN 1 +#else +# error Unknown endian +#endif +#endif /* old hash.c headers. */ + +#include "hash.h" + +#if HAVE_LITTLE_ENDIAN +#define HASH_LITTLE_ENDIAN 1 +#define HASH_BIG_ENDIAN 0 +#elif HAVE_BIG_ENDIAN +#define HASH_LITTLE_ENDIAN 0 +#define HASH_BIG_ENDIAN 1 +#else +#error Unknown endian +#endif + +#define hashsize(n) ((uint32_t)1<<(n)) +#define hashmask(n) (hashsize(n)-1) +#define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k)))) + +/* +------------------------------------------------------------------------------- +mix -- mix 3 32-bit values reversibly. + +This is reversible, so any information in (a,b,c) before mix() is +still in (a,b,c) after mix(). + +If four pairs of (a,b,c) inputs are run through mix(), or through +mix() in reverse, there are at least 32 bits of the output that +are sometimes the same for one pair and different for another pair. +This was tested for: +* pairs that differed by one bit, by two bits, in any combination + of top bits of (a,b,c), or in any combination of bottom bits of + (a,b,c). +* "differ" is defined as +, -, ^, or ~^. For + and -, I transformed + the output delta to a Gray code (a^(a>>1)) so a string of 1's (as + is commonly produced by subtraction) look like a single 1-bit + difference. +* the base values were pseudorandom, all zero but one bit set, or + all zero plus a counter that starts at zero. + +Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that +satisfy this are + 4 6 8 16 19 4 + 9 15 3 18 27 15 + 14 9 3 7 17 3 +Well, "9 15 3 18 27 15" didn't quite get 32 bits diffing +for "differ" defined as + with a one-bit base and a two-bit delta. I +used http://burtleburtle.net/bob/hash/avalanche.html to choose +the operations, constants, and arrangements of the variables. + +This does not achieve avalanche. There are input bits of (a,b,c) +that fail to affect some output bits of (a,b,c), especially of a. The +most thoroughly mixed value is c, but it doesn't really even achieve +avalanche in c. + +This allows some parallelism. Read-after-writes are good at doubling +the number of bits affected, so the goal of mixing pulls in the opposite +direction as the goal of parallelism. I did what I could. Rotates +seem to cost as much as shifts on every machine I could lay my hands +on, and rotates are much kinder to the top and bottom bits, so I used +rotates. +------------------------------------------------------------------------------- +*/ +#define mix(a,b,c) \ +{ \ + a -= c; a ^= rot(c, 4); c += b; \ + b -= a; b ^= rot(a, 6); a += c; \ + c -= b; c ^= rot(b, 8); b += a; \ + a -= c; a ^= rot(c,16); c += b; \ + b -= a; b ^= rot(a,19); a += c; \ + c -= b; c ^= rot(b, 4); b += a; \ +} + +/* +------------------------------------------------------------------------------- +final -- final mixing of 3 32-bit values (a,b,c) into c + +Pairs of (a,b,c) values differing in only a few bits will usually +produce values of c that look totally different. This was tested for +* pairs that differed by one bit, by two bits, in any combination + of top bits of (a,b,c), or in any combination of bottom bits of + (a,b,c). +* "differ" is defined as +, -, ^, or ~^. For + and -, I transformed + the output delta to a Gray code (a^(a>>1)) so a string of 1's (as + is commonly produced by subtraction) look like a single 1-bit + difference. +* the base values were pseudorandom, all zero but one bit set, or + all zero plus a counter that starts at zero. + +These constants passed: + 14 11 25 16 4 14 24 + 12 14 25 16 4 14 24 +and these came close: + 4 8 15 26 3 22 24 + 10 8 15 26 3 22 24 + 11 8 15 26 3 22 24 +------------------------------------------------------------------------------- +*/ +#define final(a,b,c) \ +{ \ + c ^= b; c -= rot(b,14); \ + a ^= c; a -= rot(c,11); \ + b ^= a; b -= rot(a,25); \ + c ^= b; c -= rot(b,16); \ + a ^= c; a -= rot(c,4); \ + b ^= a; b -= rot(a,14); \ + c ^= b; c -= rot(b,24); \ +} + +/* +-------------------------------------------------------------------- + This works on all machines. To be useful, it requires + -- that the key be an array of uint32_t's, and + -- that the length be the number of uint32_t's in the key + + The function hash_word() is identical to hashlittle() on little-endian + machines, and identical to hashbig() on big-endian machines, + except that the length has to be measured in uint32_ts rather than in + bytes. hashlittle() is more complicated than hash_word() only because + hashlittle() has to dance around fitting the key bytes into registers. +-------------------------------------------------------------------- +*/ +uint32_t hash_u32( +const uint32_t *k, /* the key, an array of uint32_t values */ +size_t length, /* the length of the key, in uint32_ts */ +uint32_t initval) /* the previous hash, or an arbitrary value */ +{ + uint32_t a,b,c; + + /* Set up the internal state */ + a = b = c = 0xdeadbeef + (((uint32_t)length)<<2) + initval; + + /*------------------------------------------------- handle most of the key */ + while (length > 3) + { + a += k[0]; + b += k[1]; + c += k[2]; + mix(a,b,c); + length -= 3; + k += 3; + } + + /*------------------------------------------- handle the last 3 uint32_t's */ + switch(length) /* all the case statements fall through */ + { + case 3 : c+=k[2]; + case 2 : b+=k[1]; + case 1 : a+=k[0]; + final(a,b,c); + case 0: /* case 0: nothing left to add */ + break; + } + /*------------------------------------------------------ report the result */ + return c; +} + +/* +------------------------------------------------------------------------------- +hashlittle() -- hash a variable-length key into a 32-bit value + k : the key (the unaligned variable-length array of bytes) + length : the length of the key, counting by bytes + val2 : IN: can be any 4-byte value OUT: second 32 bit hash. +Returns a 32-bit value. Every bit of the key affects every bit of +the return value. Two keys differing by one or two bits will have +totally different hash values. Note that the return value is better +mixed than val2, so use that first. + +The best hash table sizes are powers of 2. There is no need to do +mod a prime (mod is sooo slow!). If you need less than 32 bits, +use a bitmask. For example, if you need only 10 bits, do + h = (h & hashmask(10)); +In which case, the hash table should have hashsize(10) elements. + +If you are hashing n strings (uint8_t **)k, do it like this: + for (i=0, h=0; i<n; ++i) h = hashlittle( k[i], len[i], h); + +By Bob Jenkins, 2006. bob_jenkins@burtleburtle.net. You may use this +code any way you wish, private, educational, or commercial. It's free. + +Use for hash table lookup, or anything where one collision in 2^^32 is +acceptable. Do NOT use for cryptographic purposes. +------------------------------------------------------------------------------- +*/ + +static uint32_t hashlittle( const void *key, size_t length, uint32_t *val2 ) +{ + uint32_t a,b,c; /* internal state */ + union { const void *ptr; size_t i; } u; /* needed for Mac Powerbook G4 */ + + /* Set up the internal state */ + a = b = c = 0xdeadbeef + ((uint32_t)length) + *val2; + + u.ptr = key; + if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) { + const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks */ + const uint8_t *k8; + + /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */ + while (length > 12) + { + a += k[0]; + b += k[1]; + c += k[2]; + mix(a,b,c); + length -= 12; + k += 3; + } + + /*----------------------------- handle the last (probably partial) block */ + /* + * "k[2]&0xffffff" actually reads beyond the end of the string, but + * then masks off the part it's not allowed to read. Because the + * string is aligned, the masked-off tail is in the same word as the + * rest of the string. Every machine with memory protection I've seen + * does it on word boundaries, so is OK with this. But VALGRIND will + * still catch it and complain. The masking trick does make the hash + * noticably faster for short strings (like English words). + * + * Not on my testing with gcc 4.5 on an intel i5 CPU, at least --RR. + */ +#if 0 + switch(length) + { + case 12: c+=k[2]; b+=k[1]; a+=k[0]; break; + case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break; + case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break; + case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break; + case 8 : b+=k[1]; a+=k[0]; break; + case 7 : b+=k[1]&0xffffff; a+=k[0]; break; + case 6 : b+=k[1]&0xffff; a+=k[0]; break; + case 5 : b+=k[1]&0xff; a+=k[0]; break; + case 4 : a+=k[0]; break; + case 3 : a+=k[0]&0xffffff; break; + case 2 : a+=k[0]&0xffff; break; + case 1 : a+=k[0]&0xff; break; + case 0 : return c; /* zero length strings require no mixing */ + } + +#else /* make valgrind happy */ + + k8 = (const uint8_t *)k; + switch(length) + { + case 12: c+=k[2]; b+=k[1]; a+=k[0]; break; + case 11: c+=((uint32_t)k8[10])<<16; /* fall through */ + case 10: c+=((uint32_t)k8[9])<<8; /* fall through */ + case 9 : c+=k8[8]; /* fall through */ + case 8 : b+=k[1]; a+=k[0]; break; + case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */ + case 6 : b+=((uint32_t)k8[5])<<8; /* fall through */ + case 5 : b+=k8[4]; /* fall through */ + case 4 : a+=k[0]; break; + case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */ + case 2 : a+=((uint32_t)k8[1])<<8; /* fall through */ + case 1 : a+=k8[0]; break; + case 0 : return c; + } + +#endif /* !valgrind */ + + } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) { + const uint16_t *k = (const uint16_t *)key; /* read 16-bit chunks */ + const uint8_t *k8; + + /*--------------- all but last block: aligned reads and different mixing */ + while (length > 12) + { + a += k[0] + (((uint32_t)k[1])<<16); + b += k[2] + (((uint32_t)k[3])<<16); + c += k[4] + (((uint32_t)k[5])<<16); + mix(a,b,c); + length -= 12; + k += 6; + } + + /*----------------------------- handle the last (probably partial) block */ + k8 = (const uint8_t *)k; + switch(length) + { + case 12: c+=k[4]+(((uint32_t)k[5])<<16); + b+=k[2]+(((uint32_t)k[3])<<16); + a+=k[0]+(((uint32_t)k[1])<<16); + break; + case 11: c+=((uint32_t)k8[10])<<16; /* fall through */ + case 10: c+=k[4]; + b+=k[2]+(((uint32_t)k[3])<<16); + a+=k[0]+(((uint32_t)k[1])<<16); + break; + case 9 : c+=k8[8]; /* fall through */ + case 8 : b+=k[2]+(((uint32_t)k[3])<<16); + a+=k[0]+(((uint32_t)k[1])<<16); + break; + case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */ + case 6 : b+=k[2]; + a+=k[0]+(((uint32_t)k[1])<<16); + break; + case 5 : b+=k8[4]; /* fall through */ + case 4 : a+=k[0]+(((uint32_t)k[1])<<16); + break; + case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */ + case 2 : a+=k[0]; + break; + case 1 : a+=k8[0]; + break; + case 0 : return c; /* zero length requires no mixing */ + } + + } else { /* need to read the key one byte at a time */ + const uint8_t *k = (const uint8_t *)key; + + /*--------------- all but the last block: affect some 32 bits of (a,b,c) */ + while (length > 12) + { + a += k[0]; + a += ((uint32_t)k[1])<<8; + a += ((uint32_t)k[2])<<16; + a += ((uint32_t)k[3])<<24; + b += k[4]; + b += ((uint32_t)k[5])<<8; + b += ((uint32_t)k[6])<<16; + b += ((uint32_t)k[7])<<24; + c += k[8]; + c += ((uint32_t)k[9])<<8; + c += ((uint32_t)k[10])<<16; + c += ((uint32_t)k[11])<<24; + mix(a,b,c); + length -= 12; + k += 12; + } + + /*-------------------------------- last block: affect all 32 bits of (c) */ + switch(length) /* all the case statements fall through */ + { + case 12: c+=((uint32_t)k[11])<<24; + case 11: c+=((uint32_t)k[10])<<16; + case 10: c+=((uint32_t)k[9])<<8; + case 9 : c+=k[8]; + case 8 : b+=((uint32_t)k[7])<<24; + case 7 : b+=((uint32_t)k[6])<<16; + case 6 : b+=((uint32_t)k[5])<<8; + case 5 : b+=k[4]; + case 4 : a+=((uint32_t)k[3])<<24; + case 3 : a+=((uint32_t)k[2])<<16; + case 2 : a+=((uint32_t)k[1])<<8; + case 1 : a+=k[0]; + break; + case 0 : return c; + } + } + + final(a,b,c); + *val2 = b; + return c; +} + +/* + * hashbig(): + * This is the same as hash_word() on big-endian machines. It is different + * from hashlittle() on all machines. hashbig() takes advantage of + * big-endian byte ordering. + */ +static uint32_t hashbig( const void *key, size_t length, uint32_t *val2) +{ + uint32_t a,b,c; + union { const void *ptr; size_t i; } u; /* to cast key to (size_t) happily */ + + /* Set up the internal state */ + a = b = c = 0xdeadbeef + ((uint32_t)length) + *val2; + + u.ptr = key; + if (HASH_BIG_ENDIAN && ((u.i & 0x3) == 0)) { + const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks */ + const uint8_t *k8; + + /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */ + while (length > 12) + { + a += k[0]; + b += k[1]; + c += k[2]; + mix(a,b,c); + length -= 12; + k += 3; + } + + /*----------------------------- handle the last (probably partial) block */ + /* + * "k[2]<<8" actually reads beyond the end of the string, but + * then shifts out the part it's not allowed to read. Because the + * string is aligned, the illegal read is in the same word as the + * rest of the string. Every machine with memory protection I've seen + * does it on word boundaries, so is OK with this. But VALGRIND will + * still catch it and complain. The masking trick does make the hash + * noticably faster for short strings (like English words). + * + * Not on my testing with gcc 4.5 on an intel i5 CPU, at least --RR. + */ +#if 0 + switch(length) + { + case 12: c+=k[2]; b+=k[1]; a+=k[0]; break; + case 11: c+=k[2]&0xffffff00; b+=k[1]; a+=k[0]; break; + case 10: c+=k[2]&0xffff0000; b+=k[1]; a+=k[0]; break; + case 9 : c+=k[2]&0xff000000; b+=k[1]; a+=k[0]; break; + case 8 : b+=k[1]; a+=k[0]; break; + case 7 : b+=k[1]&0xffffff00; a+=k[0]; break; + case 6 : b+=k[1]&0xffff0000; a+=k[0]; break; + case 5 : b+=k[1]&0xff000000; a+=k[0]; break; + case 4 : a+=k[0]; break; + case 3 : a+=k[0]&0xffffff00; break; + case 2 : a+=k[0]&0xffff0000; break; + case 1 : a+=k[0]&0xff000000; break; + case 0 : return c; /* zero length strings require no mixing */ + } + +#else /* make valgrind happy */ + + k8 = (const uint8_t *)k; + switch(length) /* all the case statements fall through */ + { + case 12: c+=k[2]; b+=k[1]; a+=k[0]; break; + case 11: c+=((uint32_t)k8[10])<<8; /* fall through */ + case 10: c+=((uint32_t)k8[9])<<16; /* fall through */ + case 9 : c+=((uint32_t)k8[8])<<24; /* fall through */ + case 8 : b+=k[1]; a+=k[0]; break; + case 7 : b+=((uint32_t)k8[6])<<8; /* fall through */ + case 6 : b+=((uint32_t)k8[5])<<16; /* fall through */ + case 5 : b+=((uint32_t)k8[4])<<24; /* fall through */ + case 4 : a+=k[0]; break; + case 3 : a+=((uint32_t)k8[2])<<8; /* fall through */ + case 2 : a+=((uint32_t)k8[1])<<16; /* fall through */ + case 1 : a+=((uint32_t)k8[0])<<24; break; + case 0 : return c; + } + +#endif /* !VALGRIND */ + + } else { /* need to read the key one byte at a time */ + const uint8_t *k = (const uint8_t *)key; + + /*--------------- all but the last block: affect some 32 bits of (a,b,c) */ + while (length > 12) + { + a += ((uint32_t)k[0])<<24; + a += ((uint32_t)k[1])<<16; + a += ((uint32_t)k[2])<<8; + a += ((uint32_t)k[3]); + b += ((uint32_t)k[4])<<24; + b += ((uint32_t)k[5])<<16; + b += ((uint32_t)k[6])<<8; + b += ((uint32_t)k[7]); + c += ((uint32_t)k[8])<<24; + c += ((uint32_t)k[9])<<16; + c += ((uint32_t)k[10])<<8; + c += ((uint32_t)k[11]); + mix(a,b,c); + length -= 12; + k += 12; + } + + /*-------------------------------- last block: affect all 32 bits of (c) */ + switch(length) /* all the case statements fall through */ + { + case 12: c+=k[11]; + case 11: c+=((uint32_t)k[10])<<8; + case 10: c+=((uint32_t)k[9])<<16; + case 9 : c+=((uint32_t)k[8])<<24; + case 8 : b+=k[7]; + case 7 : b+=((uint32_t)k[6])<<8; + case 6 : b+=((uint32_t)k[5])<<16; + case 5 : b+=((uint32_t)k[4])<<24; + case 4 : a+=k[3]; + case 3 : a+=((uint32_t)k[2])<<8; + case 2 : a+=((uint32_t)k[1])<<16; + case 1 : a+=((uint32_t)k[0])<<24; + break; + case 0 : return c; + } + } + + final(a,b,c); + *val2 = b; + return c; +} + +/* I basically use hashlittle here, but use native endian within each + * element. This delivers least-surprise: hash such as "int arr[] = { + * 1, 2 }; hash_stable(arr, 2, 0);" will be the same on big and little + * endian machines, even though a bytewise hash wouldn't be. */ +uint64_t hash64_stable_64(const void *key, size_t n, uint64_t base) +{ + const uint64_t *k = key; + uint32_t a,b,c; + + /* Set up the internal state */ + a = b = c = 0xdeadbeef + ((uint32_t)n*8) + (base >> 32) + base; + + while (n > 3) { + a += (uint32_t)k[0]; + b += (uint32_t)(k[0] >> 32); + c += (uint32_t)k[1]; + mix(a,b,c); + a += (uint32_t)(k[1] >> 32); + b += (uint32_t)k[2]; + c += (uint32_t)(k[2] >> 32); + mix(a,b,c); + n -= 3; + k += 3; + } + switch (n) { + case 2: + a += (uint32_t)k[0]; + b += (uint32_t)(k[0] >> 32); + c += (uint32_t)k[1]; + mix(a,b,c); + a += (uint32_t)(k[1] >> 32); + break; + case 1: + a += (uint32_t)k[0]; + b += (uint32_t)(k[0] >> 32); + break; + case 0: + return c; + } + final(a,b,c); + return ((uint64_t)b << 32) | c; +} + +uint64_t hash64_stable_32(const void *key, size_t n, uint64_t base) +{ + const uint32_t *k = key; + uint32_t a,b,c; + + /* Set up the internal state */ + a = b = c = 0xdeadbeef + ((uint32_t)n*4) + (base >> 32) + base; + + while (n > 3) { + a += k[0]; + b += k[1]; + c += k[2]; + mix(a,b,c); + + n -= 3; + k += 3; + } + switch (n) { + case 2: + b += (uint32_t)k[1]; + case 1: + a += (uint32_t)k[0]; + break; + case 0: + return c; + } + final(a,b,c); + return ((uint64_t)b << 32) | c; +} + +uint64_t hash64_stable_16(const void *key, size_t n, uint64_t base) +{ + const uint16_t *k = key; + uint32_t a,b,c; + + /* Set up the internal state */ + a = b = c = 0xdeadbeef + ((uint32_t)n*2) + (base >> 32) + base; + + while (n > 6) { + a += (uint32_t)k[0] + ((uint32_t)k[1] << 16); + b += (uint32_t)k[2] + ((uint32_t)k[3] << 16); + c += (uint32_t)k[4] + ((uint32_t)k[5] << 16); + mix(a,b,c); + + n -= 6; + k += 6; + } + + switch (n) { + case 5: + c += (uint32_t)k[4]; + case 4: + b += ((uint32_t)k[3] << 16); + case 3: + b += (uint32_t)k[2]; + case 2: + a += ((uint32_t)k[1] << 16); + case 1: + a += (uint32_t)k[0]; + break; + case 0: + return c; + } + final(a,b,c); + return ((uint64_t)b << 32) | c; +} + +uint64_t hash64_stable_8(const void *key, size_t n, uint64_t base) +{ + uint32_t b32 = base + (base >> 32); + uint32_t lower = hashlittle(key, n, &b32); + + return ((uint64_t)b32 << 32) | lower; +} + +uint32_t hash_any(const void *key, size_t length, uint32_t base) +{ + if (HASH_BIG_ENDIAN) + return hashbig(key, length, &base); + else + return hashlittle(key, length, &base); +} + +uint32_t hash_stable_64(const void *key, size_t n, uint32_t base) +{ + return hash64_stable_64(key, n, base); +} + +uint32_t hash_stable_32(const void *key, size_t n, uint32_t base) +{ + return hash64_stable_32(key, n, base); +} + +uint32_t hash_stable_16(const void *key, size_t n, uint32_t base) +{ + return hash64_stable_16(key, n, base); +} + +uint32_t hash_stable_8(const void *key, size_t n, uint32_t base) +{ + return hashlittle(key, n, &base); +} + +/* Jenkins' lookup8 is a 64 bit hash, but he says it's obsolete. Use + * the plain one and recombine into 64 bits. */ +uint64_t hash64_any(const void *key, size_t length, uint64_t base) +{ + uint32_t b32 = base + (base >> 32); + uint32_t lower; + + if (HASH_BIG_ENDIAN) + lower = hashbig(key, length, &b32); + else + lower = hashlittle(key, length, &b32); + + return ((uint64_t)b32 << 32) | lower; +} + +#ifdef SELF_TEST + +/* used for timings */ +void driver1() +{ + uint8_t buf[256]; + uint32_t i; + uint32_t h=0; + time_t a,z; + + time(&a); + for (i=0; i<256; ++i) buf[i] = 'x'; + for (i=0; i<1; ++i) + { + h = hashlittle(&buf[0],1,h); + } + time(&z); + if (z-a > 0) printf("time %d %.8x\n", z-a, h); +} + +/* check that every input bit changes every output bit half the time */ +#define HASHSTATE 1 +#define HASHLEN 1 +#define MAXPAIR 60 +#define MAXLEN 70 +void driver2() +{ + uint8_t qa[MAXLEN+1], qb[MAXLEN+2], *a = &qa[0], *b = &qb[1]; + uint32_t c[HASHSTATE], d[HASHSTATE], i=0, j=0, k, l, m=0, z; + uint32_t e[HASHSTATE],f[HASHSTATE],g[HASHSTATE],h[HASHSTATE]; + uint32_t x[HASHSTATE],y[HASHSTATE]; + uint32_t hlen; + + printf("No more than %d trials should ever be needed \n",MAXPAIR/2); + for (hlen=0; hlen < MAXLEN; ++hlen) + { + z=0; + for (i=0; i<hlen; ++i) /*----------------------- for each input byte, */ + { + for (j=0; j<8; ++j) /*------------------------ for each input bit, */ + { + for (m=1; m<8; ++m) /*------------ for several possible initvals, */ + { + for (l=0; l<HASHSTATE; ++l) + e[l]=f[l]=g[l]=h[l]=x[l]=y[l]=~((uint32_t)0); + + /*---- check that every output bit is affected by that input bit */ + for (k=0; k<MAXPAIR; k+=2) + { + uint32_t finished=1; + /* keys have one bit different */ + for (l=0; l<hlen+1; ++l) {a[l] = b[l] = (uint8_t)0;} + /* have a and b be two keys differing in only one bit */ + a[i] ^= (k<<j); + a[i] ^= (k>>(8-j)); + c[0] = hashlittle(a, hlen, m); + b[i] ^= ((k+1)<<j); + b[i] ^= ((k+1)>>(8-j)); + d[0] = hashlittle(b, hlen, m); + /* check every bit is 1, 0, set, and not set at least once */ + for (l=0; l<HASHSTATE; ++l) + { + e[l] &= (c[l]^d[l]); + f[l] &= ~(c[l]^d[l]); + g[l] &= c[l]; + h[l] &= ~c[l]; + x[l] &= d[l]; + y[l] &= ~d[l]; + if (e[l]|f[l]|g[l]|h[l]|x[l]|y[l]) finished=0; + } + if (finished) break; + } + if (k>z) z=k; + if (k==MAXPAIR) + { + printf("Some bit didn't change: "); + printf("%.8x %.8x %.8x %.8x %.8x %.8x ", + e[0],f[0],g[0],h[0],x[0],y[0]); + printf("i %d j %d m %d len %d\n", i, j, m, hlen); + } + if (z==MAXPAIR) goto done; + } + } + } + done: + if (z < MAXPAIR) + { + printf("Mix success %2d bytes %2d initvals ",i,m); + printf("required %d trials\n", z/2); + } + } + printf("\n"); +} + +/* Check for reading beyond the end of the buffer and alignment problems */ +void driver3() +{ + uint8_t buf[MAXLEN+20], *b; + uint32_t len; + uint8_t q[] = "This is the time for all good men to come to the aid of their country..."; + uint32_t h; + uint8_t qq[] = "xThis is the time for all good men to come to the aid of their country..."; + uint32_t i; + uint8_t qqq[] = "xxThis is the time for all good men to come to the aid of their country..."; + uint32_t j; + uint8_t qqqq[] = "xxxThis is the time for all good men to come to the aid of their country..."; + uint32_t ref,x,y; + uint8_t *p; + + printf("Endianness. These lines should all be the same (for values filled in):\n"); + printf("%.8x %.8x %.8x\n", + hash_word((const uint32_t *)q, (sizeof(q)-1)/4, 13), + hash_word((const uint32_t *)q, (sizeof(q)-5)/4, 13), + hash_word((const uint32_t *)q, (sizeof(q)-9)/4, 13)); + p = q; + printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n", + hashlittle(p, sizeof(q)-1, 13), hashlittle(p, sizeof(q)-2, 13), + hashlittle(p, sizeof(q)-3, 13), hashlittle(p, sizeof(q)-4, 13), + hashlittle(p, sizeof(q)-5, 13), hashlittle(p, sizeof(q)-6, 13), + hashlittle(p, sizeof(q)-7, 13), hashlittle(p, sizeof(q)-8, 13), + hashlittle(p, sizeof(q)-9, 13), hashlittle(p, sizeof(q)-10, 13), + hashlittle(p, sizeof(q)-11, 13), hashlittle(p, sizeof(q)-12, 13)); + p = &qq[1]; + printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n", + hashlittle(p, sizeof(q)-1, 13), hashlittle(p, sizeof(q)-2, 13), + hashlittle(p, sizeof(q)-3, 13), hashlittle(p, sizeof(q)-4, 13), + hashlittle(p, sizeof(q)-5, 13), hashlittle(p, sizeof(q)-6, 13), + hashlittle(p, sizeof(q)-7, 13), hashlittle(p, sizeof(q)-8, 13), + hashlittle(p, sizeof(q)-9, 13), hashlittle(p, sizeof(q)-10, 13), + hashlittle(p, sizeof(q)-11, 13), hashlittle(p, sizeof(q)-12, 13)); + p = &qqq[2]; + printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n", + hashlittle(p, sizeof(q)-1, 13), hashlittle(p, sizeof(q)-2, 13), + hashlittle(p, sizeof(q)-3, 13), hashlittle(p, sizeof(q)-4, 13), + hashlittle(p, sizeof(q)-5, 13), hashlittle(p, sizeof(q)-6, 13), + hashlittle(p, sizeof(q)-7, 13), hashlittle(p, sizeof(q)-8, 13), + hashlittle(p, sizeof(q)-9, 13), hashlittle(p, sizeof(q)-10, 13), + hashlittle(p, sizeof(q)-11, 13), hashlittle(p, sizeof(q)-12, 13)); + p = &qqqq[3]; + printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n", + hashlittle(p, sizeof(q)-1, 13), hashlittle(p, sizeof(q)-2, 13), + hashlittle(p, sizeof(q)-3, 13), hashlittle(p, sizeof(q)-4, 13), + hashlittle(p, sizeof(q)-5, 13), hashlittle(p, sizeof(q)-6, 13), + hashlittle(p, sizeof(q)-7, 13), hashlittle(p, sizeof(q)-8, 13), + hashlittle(p, sizeof(q)-9, 13), hashlittle(p, sizeof(q)-10, 13), + hashlittle(p, sizeof(q)-11, 13), hashlittle(p, sizeof(q)-12, 13)); + printf("\n"); + + /* check that hashlittle2 and hashlittle produce the same results */ + i=47; j=0; + hashlittle2(q, sizeof(q), &i, &j); + if (hashlittle(q, sizeof(q), 47) != i) + printf("hashlittle2 and hashlittle mismatch\n"); + + /* check that hash_word2 and hash_word produce the same results */ + len = 0xdeadbeef; + i=47, j=0; + hash_word2(&len, 1, &i, &j); + if (hash_word(&len, 1, 47) != i) + printf("hash_word2 and hash_word mismatch %x %x\n", + i, hash_word(&len, 1, 47)); + + /* check hashlittle doesn't read before or after the ends of the string */ + for (h=0, b=buf+1; h<8; ++h, ++b) + { + for (i=0; i<MAXLEN; ++i) + { + len = i; + for (j=0; j<i; ++j) *(b+j)=0; + + /* these should all be equal */ + ref = hashlittle(b, len, (uint32_t)1); + *(b+i)=(uint8_t)~0; + *(b-1)=(uint8_t)~0; + x = hashlittle(b, len, (uint32_t)1); + y = hashlittle(b, len, (uint32_t)1); + if ((ref != x) || (ref != y)) + { + printf("alignment error: %.8x %.8x %.8x %d %d\n",ref,x,y, + h, i); + } + } + } +} + +/* check for problems with nulls */ + void driver4() +{ + uint8_t buf[1]; + uint32_t h,i,state[HASHSTATE]; + + + buf[0] = ~0; + for (i=0; i<HASHSTATE; ++i) state[i] = 1; + printf("These should all be different\n"); + for (i=0, h=0; i<8; ++i) + { + h = hashlittle(buf, 0, h); + printf("%2ld 0-byte strings, hash is %.8x\n", i, h); + } +} + + +int main() +{ + driver1(); /* test that the key is hashed: used for timings */ + driver2(); /* test that whole key is hashed thoroughly */ + driver3(); /* test that nothing but the key is hashed */ + driver4(); /* test hashing multiple buffers (all buffers are null) */ + return 1; +} + +#endif /* SELF_TEST */ diff --git a/lib/ccan/hash/hash.h b/lib/ccan/hash/hash.h new file mode 100644 index 0000000000..5025c0d748 --- /dev/null +++ b/lib/ccan/hash/hash.h @@ -0,0 +1,312 @@ +#ifndef CCAN_HASH_H +#define CCAN_HASH_H +#include "config.h" +#include <stdint.h> +#include <stdlib.h> +#include <ccan/build_assert/build_assert.h> + +/* Stolen mostly from: lookup3.c, by Bob Jenkins, May 2006, Public Domain. + * + * http://burtleburtle.net/bob/c/lookup3.c + */ + +/** + * hash - fast hash of an array for internal use + * @p: the array or pointer to first element + * @num: the number of elements to hash + * @base: the base number to roll into the hash (usually 0) + * + * The memory region pointed to by p is combined with the base to form + * a 32-bit hash. + * + * This hash will have different results on different machines, so is + * only useful for internal hashes (ie. not hashes sent across the + * network or saved to disk). + * + * It may also change with future versions: it could even detect at runtime + * what the fastest hash to use is. + * + * See also: hash64, hash_stable. + * + * Example: + * #include <ccan/hash/hash.h> + * #include <err.h> + * #include <stdio.h> + * #include <string.h> + * + * // Simple demonstration: idential strings will have the same hash, but + * // two different strings will probably not. + * int main(int argc, char *argv[]) + * { + * uint32_t hash1, hash2; + * + * if (argc != 3) + * err(1, "Usage: %s <string1> <string2>", argv[0]); + * + * hash1 = hash(argv[1], strlen(argv[1]), 0); + * hash2 = hash(argv[2], strlen(argv[2]), 0); + * printf("Hash is %s\n", hash1 == hash2 ? "same" : "different"); + * return 0; + * } + */ +#define hash(p, num, base) hash_any((p), (num)*sizeof(*(p)), (base)) + +/** + * hash_stable - hash of an array for external use + * @p: the array or pointer to first element + * @num: the number of elements to hash + * @base: the base number to roll into the hash (usually 0) + * + * The array of simple integer types pointed to by p is combined with + * the base to form a 32-bit hash. + * + * This hash will have the same results on different machines, so can + * be used for external hashes (ie. hashes sent across the network or + * saved to disk). The results will not change in future versions of + * this module. + * + * Note that it is only legal to hand an array of simple integer types + * to this hash (ie. char, uint16_t, int64_t, etc). In these cases, + * the same values will have the same hash result, even though the + * memory representations of integers depend on the machine + * endianness. + * + * See also: + * hash64_stable + * + * Example: + * #include <ccan/hash/hash.h> + * #include <err.h> + * #include <stdio.h> + * #include <string.h> + * + * int main(int argc, char *argv[]) + * { + * if (argc != 2) + * err(1, "Usage: %s <string-to-hash>", argv[0]); + * + * printf("Hash stable result is %u\n", + * hash_stable(argv[1], strlen(argv[1]), 0)); + * return 0; + * } + */ +#define hash_stable(p, num, base) \ + (BUILD_ASSERT_OR_ZERO(sizeof(*(p)) == 8 || sizeof(*(p)) == 4 \ + || sizeof(*(p)) == 2 || sizeof(*(p)) == 1) + \ + sizeof(*(p)) == 8 ? hash_stable_64((p), (num), (base)) \ + : sizeof(*(p)) == 4 ? hash_stable_32((p), (num), (base)) \ + : sizeof(*(p)) == 2 ? hash_stable_16((p), (num), (base)) \ + : hash_stable_8((p), (num), (base))) + +/** + * hash_u32 - fast hash an array of 32-bit values for internal use + * @key: the array of uint32_t + * @num: the number of elements to hash + * @base: the base number to roll into the hash (usually 0) + * + * The array of uint32_t pointed to by @key is combined with the base + * to form a 32-bit hash. This is 2-3 times faster than hash() on small + * arrays, but the advantage vanishes over large hashes. + * + * This hash will have different results on different machines, so is + * only useful for internal hashes (ie. not hashes sent across the + * network or saved to disk). + */ +uint32_t hash_u32(const uint32_t *key, size_t num, uint32_t base); + +/** + * hash_string - very fast hash of an ascii string + * @str: the nul-terminated string + * + * The string is hashed, using a hash function optimized for ASCII and + * similar strings. It's weaker than the other hash functions. + * + * This hash may have different results on different machines, so is + * only useful for internal hashes (ie. not hashes sent across the + * network or saved to disk). The results will be different from the + * other hash functions in this module, too. + */ +static inline uint32_t hash_string(const char *string) +{ + /* This is Karl Nelson <kenelson@ece.ucdavis.edu>'s X31 hash. + * It's a little faster than the (much better) lookup3 hash(): 56ns vs + * 84ns on my 2GHz Intel Core Duo 2 laptop for a 10 char string. */ + uint32_t ret; + + for (ret = 0; *string; string++) + ret = (ret << 5) - ret + *string; + + return ret; +} + +/** + * hash64 - fast 64-bit hash of an array for internal use + * @p: the array or pointer to first element + * @num: the number of elements to hash + * @base: the 64-bit base number to roll into the hash (usually 0) + * + * The memory region pointed to by p is combined with the base to form + * a 64-bit hash. + * + * This hash will have different results on different machines, so is + * only useful for internal hashes (ie. not hashes sent across the + * network or saved to disk). + * + * It may also change with future versions: it could even detect at runtime + * what the fastest hash to use is. + * + * See also: hash. + * + * Example: + * #include <ccan/hash/hash.h> + * #include <err.h> + * #include <stdio.h> + * #include <string.h> + * + * // Simple demonstration: idential strings will have the same hash, but + * // two different strings will probably not. + * int main(int argc, char *argv[]) + * { + * uint64_t hash1, hash2; + * + * if (argc != 3) + * err(1, "Usage: %s <string1> <string2>", argv[0]); + * + * hash1 = hash64(argv[1], strlen(argv[1]), 0); + * hash2 = hash64(argv[2], strlen(argv[2]), 0); + * printf("Hash is %s\n", hash1 == hash2 ? "same" : "different"); + * return 0; + * } + */ +#define hash64(p, num, base) hash64_any((p), (num)*sizeof(*(p)), (base)) + +/** + * hash64_stable - 64 bit hash of an array for external use + * @p: the array or pointer to first element + * @num: the number of elements to hash + * @base: the base number to roll into the hash (usually 0) + * + * The array of simple integer types pointed to by p is combined with + * the base to form a 64-bit hash. + * + * This hash will have the same results on different machines, so can + * be used for external hashes (ie. hashes sent across the network or + * saved to disk). The results will not change in future versions of + * this module. + * + * Note that it is only legal to hand an array of simple integer types + * to this hash (ie. char, uint16_t, int64_t, etc). In these cases, + * the same values will have the same hash result, even though the + * memory representations of integers depend on the machine + * endianness. + * + * See also: + * hash_stable + * + * Example: + * #include <ccan/hash/hash.h> + * #include <err.h> + * #include <stdio.h> + * #include <string.h> + * + * int main(int argc, char *argv[]) + * { + * if (argc != 2) + * err(1, "Usage: %s <string-to-hash>", argv[0]); + * + * printf("Hash stable result is %llu\n", + * (long long)hash64_stable(argv[1], strlen(argv[1]), 0)); + * return 0; + * } + */ +#define hash64_stable(p, num, base) \ + (BUILD_ASSERT_OR_ZERO(sizeof(*(p)) == 8 || sizeof(*(p)) == 4 \ + || sizeof(*(p)) == 2 || sizeof(*(p)) == 1) + \ + sizeof(*(p)) == 8 ? hash64_stable_64((p), (num), (base)) \ + : sizeof(*(p)) == 4 ? hash64_stable_32((p), (num), (base)) \ + : sizeof(*(p)) == 2 ? hash64_stable_16((p), (num), (base)) \ + : hash64_stable_8((p), (num), (base))) + + +/** + * hashl - fast 32/64-bit hash of an array for internal use + * @p: the array or pointer to first element + * @num: the number of elements to hash + * @base: the base number to roll into the hash (usually 0) + * + * This is either hash() or hash64(), on 32/64 bit long machines. + */ +#define hashl(p, num, base) \ + (BUILD_ASSERT_OR_ZERO(sizeof(long) == sizeof(uint32_t) \ + || sizeof(long) == sizeof(uint64_t)) + \ + (sizeof(long) == sizeof(uint64_t) \ + ? hash64((p), (num), (base)) : hash((p), (num), (base)))) + +/* Our underlying operations. */ +uint32_t hash_any(const void *key, size_t length, uint32_t base); +uint32_t hash_stable_64(const void *key, size_t n, uint32_t base); +uint32_t hash_stable_32(const void *key, size_t n, uint32_t base); +uint32_t hash_stable_16(const void *key, size_t n, uint32_t base); +uint32_t hash_stable_8(const void *key, size_t n, uint32_t base); +uint64_t hash64_any(const void *key, size_t length, uint64_t base); +uint64_t hash64_stable_64(const void *key, size_t n, uint64_t base); +uint64_t hash64_stable_32(const void *key, size_t n, uint64_t base); +uint64_t hash64_stable_16(const void *key, size_t n, uint64_t base); +uint64_t hash64_stable_8(const void *key, size_t n, uint64_t base); + +/** + * hash_pointer - hash a pointer for internal use + * @p: the pointer value to hash + * @base: the base number to roll into the hash (usually 0) + * + * The pointer p (not what p points to!) is combined with the base to form + * a 32-bit hash. + * + * This hash will have different results on different machines, so is + * only useful for internal hashes (ie. not hashes sent across the + * network or saved to disk). + * + * Example: + * #include <ccan/hash/hash.h> + * + * // Code to keep track of memory regions. + * struct region { + * struct region *chain; + * void *start; + * unsigned int size; + * }; + * // We keep a simple hash table. + * static struct region *region_hash[128]; + * + * static void add_region(struct region *r) + * { + * unsigned int h = hash_pointer(r->start, 0); + * + * r->chain = region_hash[h]; + * region_hash[h] = r->chain; + * } + * + * static struct region *find_region(const void *start) + * { + * struct region *r; + * + * for (r = region_hash[hash_pointer(start, 0)]; r; r = r->chain) + * if (r->start == start) + * return r; + * return NULL; + * } + */ +static inline uint32_t hash_pointer(const void *p, uint32_t base) +{ + if (sizeof(p) % sizeof(uint32_t) == 0) { + /* This convoluted union is the right way of aliasing. */ + union { + uint32_t u32[sizeof(p) / sizeof(uint32_t)]; + const void *p; + } u; + u.p = p; + return hash_u32(u.u32, sizeof(p) / sizeof(uint32_t), base); + } else + return hash(&p, 1, base); +} +#endif /* HASH_H */ diff --git a/lib/ccan/hash/test/api-hash_stable.c b/lib/ccan/hash/test/api-hash_stable.c new file mode 100644 index 0000000000..bb58d16b18 --- /dev/null +++ b/lib/ccan/hash/test/api-hash_stable.c @@ -0,0 +1,300 @@ +#include <ccan/hash/hash.h> +#include <ccan/tap/tap.h> +#include <stdbool.h> +#include <string.h> + +#define ARRAY_WORDS 5 + +int main(int argc, char *argv[]) +{ + unsigned int i; + uint8_t u8array[ARRAY_WORDS]; + uint16_t u16array[ARRAY_WORDS]; + uint32_t u32array[ARRAY_WORDS]; + uint64_t u64array[ARRAY_WORDS]; + + /* Initialize arrays. */ + for (i = 0; i < ARRAY_WORDS; i++) { + u8array[i] = i; + u16array[i] = i; + u32array[i] = i; + u64array[i] = i; + } + + plan_tests(264); + + /* hash_stable is API-guaranteed. */ + ok1(hash_stable(u8array, ARRAY_WORDS, 0) == 0x1d4833cc); + ok1(hash_stable(u8array, ARRAY_WORDS, 1) == 0x37125e2 ); + ok1(hash_stable(u8array, ARRAY_WORDS, 2) == 0x330a007a); + ok1(hash_stable(u8array, ARRAY_WORDS, 4) == 0x7b0df29b); + ok1(hash_stable(u8array, ARRAY_WORDS, 8) == 0xe7e5d741); + ok1(hash_stable(u8array, ARRAY_WORDS, 16) == 0xaae57471); + ok1(hash_stable(u8array, ARRAY_WORDS, 32) == 0xc55399e5); + ok1(hash_stable(u8array, ARRAY_WORDS, 64) == 0x67f21f7 ); + ok1(hash_stable(u8array, ARRAY_WORDS, 128) == 0x1d795b71); + ok1(hash_stable(u8array, ARRAY_WORDS, 256) == 0xeb961671); + ok1(hash_stable(u8array, ARRAY_WORDS, 512) == 0xc2597247); + ok1(hash_stable(u8array, ARRAY_WORDS, 1024) == 0x3f5c4d75); + ok1(hash_stable(u8array, ARRAY_WORDS, 2048) == 0xe65cf4f9); + ok1(hash_stable(u8array, ARRAY_WORDS, 4096) == 0xf2cd06cb); + ok1(hash_stable(u8array, ARRAY_WORDS, 8192) == 0x443041e1); + ok1(hash_stable(u8array, ARRAY_WORDS, 16384) == 0xdfc618f5); + ok1(hash_stable(u8array, ARRAY_WORDS, 32768) == 0x5e3d5b97); + ok1(hash_stable(u8array, ARRAY_WORDS, 65536) == 0xd5f64730); + ok1(hash_stable(u8array, ARRAY_WORDS, 131072) == 0x372bbecc); + ok1(hash_stable(u8array, ARRAY_WORDS, 262144) == 0x7c194c8d); + ok1(hash_stable(u8array, ARRAY_WORDS, 524288) == 0x16cbb416); + ok1(hash_stable(u8array, ARRAY_WORDS, 1048576) == 0x53e99222); + ok1(hash_stable(u8array, ARRAY_WORDS, 2097152) == 0x6394554a); + ok1(hash_stable(u8array, ARRAY_WORDS, 4194304) == 0xd83a506d); + ok1(hash_stable(u8array, ARRAY_WORDS, 8388608) == 0x7619d9a4); + ok1(hash_stable(u8array, ARRAY_WORDS, 16777216) == 0xfe98e5f6); + ok1(hash_stable(u8array, ARRAY_WORDS, 33554432) == 0x6c262927); + ok1(hash_stable(u8array, ARRAY_WORDS, 67108864) == 0x3f0106fd); + ok1(hash_stable(u8array, ARRAY_WORDS, 134217728) == 0xc91e3a28); + ok1(hash_stable(u8array, ARRAY_WORDS, 268435456) == 0x14229579); + ok1(hash_stable(u8array, ARRAY_WORDS, 536870912) == 0x9dbefa76); + ok1(hash_stable(u8array, ARRAY_WORDS, 1073741824) == 0xb05c0c78); + ok1(hash_stable(u8array, ARRAY_WORDS, 2147483648U) == 0x88f24d81); + + ok1(hash_stable(u16array, ARRAY_WORDS, 0) == 0xecb5f507); + ok1(hash_stable(u16array, ARRAY_WORDS, 1) == 0xadd666e6); + ok1(hash_stable(u16array, ARRAY_WORDS, 2) == 0xea0f214c); + ok1(hash_stable(u16array, ARRAY_WORDS, 4) == 0xae4051ba); + ok1(hash_stable(u16array, ARRAY_WORDS, 8) == 0x6ed28026); + ok1(hash_stable(u16array, ARRAY_WORDS, 16) == 0xa3917a19); + ok1(hash_stable(u16array, ARRAY_WORDS, 32) == 0xf370f32b); + ok1(hash_stable(u16array, ARRAY_WORDS, 64) == 0x807af460); + ok1(hash_stable(u16array, ARRAY_WORDS, 128) == 0xb4c8cd83); + ok1(hash_stable(u16array, ARRAY_WORDS, 256) == 0xa10cb5b0); + ok1(hash_stable(u16array, ARRAY_WORDS, 512) == 0x8b7d7387); + ok1(hash_stable(u16array, ARRAY_WORDS, 1024) == 0x9e49d1c ); + ok1(hash_stable(u16array, ARRAY_WORDS, 2048) == 0x288830d1); + ok1(hash_stable(u16array, ARRAY_WORDS, 4096) == 0xbe078a43); + ok1(hash_stable(u16array, ARRAY_WORDS, 8192) == 0xa16d5d88); + ok1(hash_stable(u16array, ARRAY_WORDS, 16384) == 0x46839fcd); + ok1(hash_stable(u16array, ARRAY_WORDS, 32768) == 0x9db9bd4f); + ok1(hash_stable(u16array, ARRAY_WORDS, 65536) == 0xedff58f8); + ok1(hash_stable(u16array, ARRAY_WORDS, 131072) == 0x95ecef18); + ok1(hash_stable(u16array, ARRAY_WORDS, 262144) == 0x23c31b7d); + ok1(hash_stable(u16array, ARRAY_WORDS, 524288) == 0x1d85c7d0); + ok1(hash_stable(u16array, ARRAY_WORDS, 1048576) == 0x25218842); + ok1(hash_stable(u16array, ARRAY_WORDS, 2097152) == 0x711d985c); + ok1(hash_stable(u16array, ARRAY_WORDS, 4194304) == 0x85470eca); + ok1(hash_stable(u16array, ARRAY_WORDS, 8388608) == 0x99ed4ceb); + ok1(hash_stable(u16array, ARRAY_WORDS, 16777216) == 0x67b3710c); + ok1(hash_stable(u16array, ARRAY_WORDS, 33554432) == 0x77f1ab35); + ok1(hash_stable(u16array, ARRAY_WORDS, 67108864) == 0x81f688aa); + ok1(hash_stable(u16array, ARRAY_WORDS, 134217728) == 0x27b56ca5); + ok1(hash_stable(u16array, ARRAY_WORDS, 268435456) == 0xf21ba203); + ok1(hash_stable(u16array, ARRAY_WORDS, 536870912) == 0xd48d1d1 ); + ok1(hash_stable(u16array, ARRAY_WORDS, 1073741824) == 0xa542b62d); + ok1(hash_stable(u16array, ARRAY_WORDS, 2147483648U) == 0xa04c7058); + + ok1(hash_stable(u32array, ARRAY_WORDS, 0) == 0x13305f8c); + ok1(hash_stable(u32array, ARRAY_WORDS, 1) == 0x171abf74); + ok1(hash_stable(u32array, ARRAY_WORDS, 2) == 0x7646fcc7); + ok1(hash_stable(u32array, ARRAY_WORDS, 4) == 0xa758ed5); + ok1(hash_stable(u32array, ARRAY_WORDS, 8) == 0x2dedc2e4); + ok1(hash_stable(u32array, ARRAY_WORDS, 16) == 0x28e2076b); + ok1(hash_stable(u32array, ARRAY_WORDS, 32) == 0xb73091c5); + ok1(hash_stable(u32array, ARRAY_WORDS, 64) == 0x87daf5db); + ok1(hash_stable(u32array, ARRAY_WORDS, 128) == 0xa16dfe20); + ok1(hash_stable(u32array, ARRAY_WORDS, 256) == 0x300c63c3); + ok1(hash_stable(u32array, ARRAY_WORDS, 512) == 0x255c91fc); + ok1(hash_stable(u32array, ARRAY_WORDS, 1024) == 0x6357b26); + ok1(hash_stable(u32array, ARRAY_WORDS, 2048) == 0x4bc5f339); + ok1(hash_stable(u32array, ARRAY_WORDS, 4096) == 0x1301617c); + ok1(hash_stable(u32array, ARRAY_WORDS, 8192) == 0x506792c9); + ok1(hash_stable(u32array, ARRAY_WORDS, 16384) == 0xcd596705); + ok1(hash_stable(u32array, ARRAY_WORDS, 32768) == 0xa8713cac); + ok1(hash_stable(u32array, ARRAY_WORDS, 65536) == 0x94d9794); + ok1(hash_stable(u32array, ARRAY_WORDS, 131072) == 0xac753e8); + ok1(hash_stable(u32array, ARRAY_WORDS, 262144) == 0xcd8bdd20); + ok1(hash_stable(u32array, ARRAY_WORDS, 524288) == 0xd44faf80); + ok1(hash_stable(u32array, ARRAY_WORDS, 1048576) == 0x2547ccbe); + ok1(hash_stable(u32array, ARRAY_WORDS, 2097152) == 0xbab06dbc); + ok1(hash_stable(u32array, ARRAY_WORDS, 4194304) == 0xaac0e882); + ok1(hash_stable(u32array, ARRAY_WORDS, 8388608) == 0x443f48d0); + ok1(hash_stable(u32array, ARRAY_WORDS, 16777216) == 0xdff49fcc); + ok1(hash_stable(u32array, ARRAY_WORDS, 33554432) == 0x9ce0fd65); + ok1(hash_stable(u32array, ARRAY_WORDS, 67108864) == 0x9ddb1def); + ok1(hash_stable(u32array, ARRAY_WORDS, 134217728) == 0x86096f25); + ok1(hash_stable(u32array, ARRAY_WORDS, 268435456) == 0xe713b7b5); + ok1(hash_stable(u32array, ARRAY_WORDS, 536870912) == 0x5baeffc5); + ok1(hash_stable(u32array, ARRAY_WORDS, 1073741824) == 0xde874f52); + ok1(hash_stable(u32array, ARRAY_WORDS, 2147483648U) == 0xeca13b4e); + + ok1(hash_stable(u64array, ARRAY_WORDS, 0) == 0x12ef6302); + ok1(hash_stable(u64array, ARRAY_WORDS, 1) == 0xe9aeb406); + ok1(hash_stable(u64array, ARRAY_WORDS, 2) == 0xc4218ceb); + ok1(hash_stable(u64array, ARRAY_WORDS, 4) == 0xb3d11412); + ok1(hash_stable(u64array, ARRAY_WORDS, 8) == 0xdafbd654); + ok1(hash_stable(u64array, ARRAY_WORDS, 16) == 0x9c336cba); + ok1(hash_stable(u64array, ARRAY_WORDS, 32) == 0x65059721); + ok1(hash_stable(u64array, ARRAY_WORDS, 64) == 0x95b5bbe6); + ok1(hash_stable(u64array, ARRAY_WORDS, 128) == 0xe7596b84); + ok1(hash_stable(u64array, ARRAY_WORDS, 256) == 0x503622a2); + ok1(hash_stable(u64array, ARRAY_WORDS, 512) == 0xecdcc5ca); + ok1(hash_stable(u64array, ARRAY_WORDS, 1024) == 0xc40d0513); + ok1(hash_stable(u64array, ARRAY_WORDS, 2048) == 0xaab25e4d); + ok1(hash_stable(u64array, ARRAY_WORDS, 4096) == 0xcc353fb9); + ok1(hash_stable(u64array, ARRAY_WORDS, 8192) == 0x18e2319f); + ok1(hash_stable(u64array, ARRAY_WORDS, 16384) == 0xfddaae8d); + ok1(hash_stable(u64array, ARRAY_WORDS, 32768) == 0xef7976f2); + ok1(hash_stable(u64array, ARRAY_WORDS, 65536) == 0x86359fc9); + ok1(hash_stable(u64array, ARRAY_WORDS, 131072) == 0x8b5af385); + ok1(hash_stable(u64array, ARRAY_WORDS, 262144) == 0x80d4ee31); + ok1(hash_stable(u64array, ARRAY_WORDS, 524288) == 0x42f5f85b); + ok1(hash_stable(u64array, ARRAY_WORDS, 1048576) == 0x9a6920e1); + ok1(hash_stable(u64array, ARRAY_WORDS, 2097152) == 0x7b7c9850); + ok1(hash_stable(u64array, ARRAY_WORDS, 4194304) == 0x69573e09); + ok1(hash_stable(u64array, ARRAY_WORDS, 8388608) == 0xc942bc0e); + ok1(hash_stable(u64array, ARRAY_WORDS, 16777216) == 0x7a89f0f1); + ok1(hash_stable(u64array, ARRAY_WORDS, 33554432) == 0x2dd641ca); + ok1(hash_stable(u64array, ARRAY_WORDS, 67108864) == 0x89bbd391); + ok1(hash_stable(u64array, ARRAY_WORDS, 134217728) == 0xbcf88e31); + ok1(hash_stable(u64array, ARRAY_WORDS, 268435456) == 0xfa7a3460); + ok1(hash_stable(u64array, ARRAY_WORDS, 536870912) == 0x49a37be0); + ok1(hash_stable(u64array, ARRAY_WORDS, 1073741824) == 0x1b346394); + ok1(hash_stable(u64array, ARRAY_WORDS, 2147483648U) == 0x6c3a1592); + + ok1(hash64_stable(u8array, ARRAY_WORDS, 0) == 16887282882572727244ULL); + ok1(hash64_stable(u8array, ARRAY_WORDS, 1) == 12032777473133454818ULL); + ok1(hash64_stable(u8array, ARRAY_WORDS, 2) == 18183407363221487738ULL); + ok1(hash64_stable(u8array, ARRAY_WORDS, 4) == 17860764172704150171ULL); + ok1(hash64_stable(u8array, ARRAY_WORDS, 8) == 18076051600675559233ULL); + ok1(hash64_stable(u8array, ARRAY_WORDS, 16) == 9909361918431556721ULL); + ok1(hash64_stable(u8array, ARRAY_WORDS, 32) == 12937969888744675813ULL); + ok1(hash64_stable(u8array, ARRAY_WORDS, 64) == 5245669057381736951ULL); + ok1(hash64_stable(u8array, ARRAY_WORDS, 128) == 4376874646406519665ULL); + ok1(hash64_stable(u8array, ARRAY_WORDS, 256) == 14219974419871569521ULL); + ok1(hash64_stable(u8array, ARRAY_WORDS, 512) == 2263415354134458951ULL); + ok1(hash64_stable(u8array, ARRAY_WORDS, 1024) == 4953859694526221685ULL); + ok1(hash64_stable(u8array, ARRAY_WORDS, 2048) == 3432228642067641593ULL); + ok1(hash64_stable(u8array, ARRAY_WORDS, 4096) == 1219647244417697483ULL); + ok1(hash64_stable(u8array, ARRAY_WORDS, 8192) == 7629939424585859553ULL); + ok1(hash64_stable(u8array, ARRAY_WORDS, 16384) == 10041660531376789749ULL); + ok1(hash64_stable(u8array, ARRAY_WORDS, 32768) == 13859885793922603927ULL); + ok1(hash64_stable(u8array, ARRAY_WORDS, 65536) == 15069060338344675120ULL); + ok1(hash64_stable(u8array, ARRAY_WORDS, 131072) == 818163430835601100ULL); + ok1(hash64_stable(u8array, ARRAY_WORDS, 262144) == 14914314323019517069ULL); + ok1(hash64_stable(u8array, ARRAY_WORDS, 524288) == 17518437749769352214ULL); + ok1(hash64_stable(u8array, ARRAY_WORDS, 1048576) == 14920048004901212706ULL); + ok1(hash64_stable(u8array, ARRAY_WORDS, 2097152) == 8758567366332536138ULL); + ok1(hash64_stable(u8array, ARRAY_WORDS, 4194304) == 6226655736088907885ULL); + ok1(hash64_stable(u8array, ARRAY_WORDS, 8388608) == 13716650013685832100ULL); + ok1(hash64_stable(u8array, ARRAY_WORDS, 16777216) == 305325651636315638ULL); + ok1(hash64_stable(u8array, ARRAY_WORDS, 33554432) == 16784147606583781671ULL); + ok1(hash64_stable(u8array, ARRAY_WORDS, 67108864) == 16509467555140798205ULL); + ok1(hash64_stable(u8array, ARRAY_WORDS, 134217728) == 8717281234694060584ULL); + ok1(hash64_stable(u8array, ARRAY_WORDS, 268435456) == 8098476701725660537ULL); + ok1(hash64_stable(u8array, ARRAY_WORDS, 536870912) == 16345871539461094006ULL); + ok1(hash64_stable(u8array, ARRAY_WORDS, 1073741824) == 3755557000429964408ULL); + ok1(hash64_stable(u8array, ARRAY_WORDS, 2147483648U) == 15017348801959710081ULL); + + ok1(hash64_stable(u16array, ARRAY_WORDS, 0) == 1038028831307724039ULL); + ok1(hash64_stable(u16array, ARRAY_WORDS, 1) == 10155473272642627302ULL); + ok1(hash64_stable(u16array, ARRAY_WORDS, 2) == 5714751190106841420ULL); + ok1(hash64_stable(u16array, ARRAY_WORDS, 4) == 3923885607767527866ULL); + ok1(hash64_stable(u16array, ARRAY_WORDS, 8) == 3931017318293995558ULL); + ok1(hash64_stable(u16array, ARRAY_WORDS, 16) == 1469696588339313177ULL); + ok1(hash64_stable(u16array, ARRAY_WORDS, 32) == 11522218526952715051ULL); + ok1(hash64_stable(u16array, ARRAY_WORDS, 64) == 6953517591561958496ULL); + ok1(hash64_stable(u16array, ARRAY_WORDS, 128) == 7406689491740052867ULL); + ok1(hash64_stable(u16array, ARRAY_WORDS, 256) == 10101844489704093104ULL); + ok1(hash64_stable(u16array, ARRAY_WORDS, 512) == 12511348870707245959ULL); + ok1(hash64_stable(u16array, ARRAY_WORDS, 1024) == 1614019938016861468ULL); + ok1(hash64_stable(u16array, ARRAY_WORDS, 2048) == 5294796182374592721ULL); + ok1(hash64_stable(u16array, ARRAY_WORDS, 4096) == 16089570706643716675ULL); + ok1(hash64_stable(u16array, ARRAY_WORDS, 8192) == 1689302638424579464ULL); + ok1(hash64_stable(u16array, ARRAY_WORDS, 16384) == 1446340172370386893ULL); + ok1(hash64_stable(u16array, ARRAY_WORDS, 32768) == 16535503506744393039ULL); + ok1(hash64_stable(u16array, ARRAY_WORDS, 65536) == 3496794142527150328ULL); + ok1(hash64_stable(u16array, ARRAY_WORDS, 131072) == 6568245367474548504ULL); + ok1(hash64_stable(u16array, ARRAY_WORDS, 262144) == 9487676460765485949ULL); + ok1(hash64_stable(u16array, ARRAY_WORDS, 524288) == 4519762130966530000ULL); + ok1(hash64_stable(u16array, ARRAY_WORDS, 1048576) == 15623412069215340610ULL); + ok1(hash64_stable(u16array, ARRAY_WORDS, 2097152) == 544013388676438108ULL); + ok1(hash64_stable(u16array, ARRAY_WORDS, 4194304) == 5594904760290840266ULL); + ok1(hash64_stable(u16array, ARRAY_WORDS, 8388608) == 18098755780041592043ULL); + ok1(hash64_stable(u16array, ARRAY_WORDS, 16777216) == 6389168672387330316ULL); + ok1(hash64_stable(u16array, ARRAY_WORDS, 33554432) == 896986127732419381ULL); + ok1(hash64_stable(u16array, ARRAY_WORDS, 67108864) == 13232626471143901354ULL); + ok1(hash64_stable(u16array, ARRAY_WORDS, 134217728) == 53378562890493093ULL); + ok1(hash64_stable(u16array, ARRAY_WORDS, 268435456) == 10072361400297824771ULL); + ok1(hash64_stable(u16array, ARRAY_WORDS, 536870912) == 14511948118285144529ULL); + ok1(hash64_stable(u16array, ARRAY_WORDS, 1073741824) == 6981033484844447277ULL); + ok1(hash64_stable(u16array, ARRAY_WORDS, 2147483648U) == 5619339091684126808ULL); + + ok1(hash64_stable(u32array, ARRAY_WORDS, 0) == 3037571077312110476ULL); + ok1(hash64_stable(u32array, ARRAY_WORDS, 1) == 14732398743825071988ULL); + ok1(hash64_stable(u32array, ARRAY_WORDS, 2) == 14949132158206672071ULL); + ok1(hash64_stable(u32array, ARRAY_WORDS, 4) == 1291370080511561429ULL); + ok1(hash64_stable(u32array, ARRAY_WORDS, 8) == 10792665964172133092ULL); + ok1(hash64_stable(u32array, ARRAY_WORDS, 16) == 14250138032054339435ULL); + ok1(hash64_stable(u32array, ARRAY_WORDS, 32) == 17136741522078732741ULL); + ok1(hash64_stable(u32array, ARRAY_WORDS, 64) == 3260193403318236635ULL); + ok1(hash64_stable(u32array, ARRAY_WORDS, 128) == 10526616652205653536ULL); + ok1(hash64_stable(u32array, ARRAY_WORDS, 256) == 9019690373358576579ULL); + ok1(hash64_stable(u32array, ARRAY_WORDS, 512) == 6997491436599677436ULL); + ok1(hash64_stable(u32array, ARRAY_WORDS, 1024) == 18302783371416533798ULL); + ok1(hash64_stable(u32array, ARRAY_WORDS, 2048) == 10149320644446516025ULL); + ok1(hash64_stable(u32array, ARRAY_WORDS, 4096) == 7073759949410623868ULL); + ok1(hash64_stable(u32array, ARRAY_WORDS, 8192) == 17442399482223760073ULL); + ok1(hash64_stable(u32array, ARRAY_WORDS, 16384) == 2983906194216281861ULL); + ok1(hash64_stable(u32array, ARRAY_WORDS, 32768) == 4975845419129060524ULL); + ok1(hash64_stable(u32array, ARRAY_WORDS, 65536) == 594019910205413268ULL); + ok1(hash64_stable(u32array, ARRAY_WORDS, 131072) == 11903010186073691112ULL); + ok1(hash64_stable(u32array, ARRAY_WORDS, 262144) == 7339636527154847008ULL); + ok1(hash64_stable(u32array, ARRAY_WORDS, 524288) == 15243305400579108736ULL); + ok1(hash64_stable(u32array, ARRAY_WORDS, 1048576) == 16737926245392043198ULL); + ok1(hash64_stable(u32array, ARRAY_WORDS, 2097152) == 15725083267699862972ULL); + ok1(hash64_stable(u32array, ARRAY_WORDS, 4194304) == 12527834265678833794ULL); + ok1(hash64_stable(u32array, ARRAY_WORDS, 8388608) == 13908436455987824848ULL); + ok1(hash64_stable(u32array, ARRAY_WORDS, 16777216) == 9672773345173872588ULL); + ok1(hash64_stable(u32array, ARRAY_WORDS, 33554432) == 2305314279896710501ULL); + ok1(hash64_stable(u32array, ARRAY_WORDS, 67108864) == 1866733780381408751ULL); + ok1(hash64_stable(u32array, ARRAY_WORDS, 134217728) == 11906263969465724709ULL); + ok1(hash64_stable(u32array, ARRAY_WORDS, 268435456) == 5501594918093830069ULL); + ok1(hash64_stable(u32array, ARRAY_WORDS, 536870912) == 15823785789276225477ULL); + ok1(hash64_stable(u32array, ARRAY_WORDS, 1073741824) == 17353000723889475410ULL); + ok1(hash64_stable(u32array, ARRAY_WORDS, 2147483648U) == 7494736910655503182ULL); + + ok1(hash64_stable(u64array, ARRAY_WORDS, 0) == 9765419389786481410ULL); + ok1(hash64_stable(u64array, ARRAY_WORDS, 1) == 11182806172127114246ULL); + ok1(hash64_stable(u64array, ARRAY_WORDS, 2) == 2559155171395472619ULL); + ok1(hash64_stable(u64array, ARRAY_WORDS, 4) == 3311692033324815378ULL); + ok1(hash64_stable(u64array, ARRAY_WORDS, 8) == 1297175419505333844ULL); + ok1(hash64_stable(u64array, ARRAY_WORDS, 16) == 617896928653569210ULL); + ok1(hash64_stable(u64array, ARRAY_WORDS, 32) == 1517398559958603553ULL); + ok1(hash64_stable(u64array, ARRAY_WORDS, 64) == 4504821917445110758ULL); + ok1(hash64_stable(u64array, ARRAY_WORDS, 128) == 1971743331114904452ULL); + ok1(hash64_stable(u64array, ARRAY_WORDS, 256) == 6177667912354374306ULL); + ok1(hash64_stable(u64array, ARRAY_WORDS, 512) == 15570521289777792458ULL); + ok1(hash64_stable(u64array, ARRAY_WORDS, 1024) == 9204559632415917331ULL); + ok1(hash64_stable(u64array, ARRAY_WORDS, 2048) == 9008982669760028237ULL); + ok1(hash64_stable(u64array, ARRAY_WORDS, 4096) == 14803537660281700281ULL); + ok1(hash64_stable(u64array, ARRAY_WORDS, 8192) == 2873966517448487327ULL); + ok1(hash64_stable(u64array, ARRAY_WORDS, 16384) == 5859277625928363661ULL); + ok1(hash64_stable(u64array, ARRAY_WORDS, 32768) == 15520461285618185970ULL); + ok1(hash64_stable(u64array, ARRAY_WORDS, 65536) == 16746489793331175369ULL); + ok1(hash64_stable(u64array, ARRAY_WORDS, 131072) == 514952025484227461ULL); + ok1(hash64_stable(u64array, ARRAY_WORDS, 262144) == 10867212269810675249ULL); + ok1(hash64_stable(u64array, ARRAY_WORDS, 524288) == 9822204377278314587ULL); + ok1(hash64_stable(u64array, ARRAY_WORDS, 1048576) == 3295088921987850465ULL); + ok1(hash64_stable(u64array, ARRAY_WORDS, 2097152) == 7559197431498053712ULL); + ok1(hash64_stable(u64array, ARRAY_WORDS, 4194304) == 1667267269116771849ULL); + ok1(hash64_stable(u64array, ARRAY_WORDS, 8388608) == 2916804068951374862ULL); + ok1(hash64_stable(u64array, ARRAY_WORDS, 16777216) == 14422558383125688561ULL); + ok1(hash64_stable(u64array, ARRAY_WORDS, 33554432) == 10083112683694342602ULL); + ok1(hash64_stable(u64array, ARRAY_WORDS, 67108864) == 7222777647078298513ULL); + ok1(hash64_stable(u64array, ARRAY_WORDS, 134217728) == 18424513674048212529ULL); + ok1(hash64_stable(u64array, ARRAY_WORDS, 268435456) == 14913668581101810784ULL); + ok1(hash64_stable(u64array, ARRAY_WORDS, 536870912) == 14377721174297902048ULL); + ok1(hash64_stable(u64array, ARRAY_WORDS, 1073741824) == 6031715005667500948ULL); + ok1(hash64_stable(u64array, ARRAY_WORDS, 2147483648U) == 4827100319722378642ULL); + + return exit_status(); +} diff --git a/lib/ccan/hash/test/run.c b/lib/ccan/hash/test/run.c new file mode 100644 index 0000000000..dad8e86b9e --- /dev/null +++ b/lib/ccan/hash/test/run.c @@ -0,0 +1,149 @@ +#include <ccan/hash/hash.h> +#include <ccan/tap/tap.h> +#include <ccan/hash/hash.c> +#include <stdbool.h> +#include <string.h> + +#define ARRAY_WORDS 5 + +int main(int argc, char *argv[]) +{ + unsigned int i, j, k; + uint32_t array[ARRAY_WORDS], val; + char array2[sizeof(array) + sizeof(uint32_t)]; + uint32_t results[256]; + + /* Initialize array. */ + for (i = 0; i < ARRAY_WORDS; i++) + array[i] = i; + + plan_tests(39); + /* Hash should be the same, indep of memory alignment. */ + val = hash(array, ARRAY_WORDS, 0); + for (i = 0; i < sizeof(uint32_t); i++) { + memcpy(array2 + i, array, sizeof(array)); + ok(hash(array2 + i, ARRAY_WORDS, 0) != val, + "hash matched at offset %i", i); + } + + /* Hash of random values should have random distribution: + * check one byte at a time. */ + for (i = 0; i < sizeof(uint32_t); i++) { + unsigned int lowest = -1U, highest = 0; + + memset(results, 0, sizeof(results)); + + for (j = 0; j < 256000; j++) { + for (k = 0; k < ARRAY_WORDS; k++) + array[k] = random(); + results[(hash(array, ARRAY_WORDS, 0) >> i*8)&0xFF]++; + } + + for (j = 0; j < 256; j++) { + if (results[j] < lowest) + lowest = results[j]; + if (results[j] > highest) + highest = results[j]; + } + /* Expect within 20% */ + ok(lowest > 800, "Byte %i lowest %i", i, lowest); + ok(highest < 1200, "Byte %i highest %i", i, highest); + diag("Byte %i, range %u-%u", i, lowest, highest); + } + + /* Hash of random values should have random distribution: + * check one byte at a time. */ + for (i = 0; i < sizeof(uint64_t); i++) { + unsigned int lowest = -1U, highest = 0; + + memset(results, 0, sizeof(results)); + + for (j = 0; j < 256000; j++) { + for (k = 0; k < ARRAY_WORDS; k++) + array[k] = random(); + results[(hash64(array, sizeof(array)/sizeof(uint64_t), + 0) >> i*8)&0xFF]++; + } + + for (j = 0; j < 256; j++) { + if (results[j] < lowest) + lowest = results[j]; + if (results[j] > highest) + highest = results[j]; + } + /* Expect within 20% */ + ok(lowest > 800, "Byte %i lowest %i", i, lowest); + ok(highest < 1200, "Byte %i highest %i", i, highest); + diag("Byte %i, range %u-%u", i, lowest, highest); + } + + /* Hash of pointer values should also have random distribution. */ + for (i = 0; i < sizeof(uint32_t); i++) { + unsigned int lowest = -1U, highest = 0; + char *p = malloc(256000); + + memset(results, 0, sizeof(results)); + + for (j = 0; j < 256000; j++) + results[(hash_pointer(p + j, 0) >> i*8)&0xFF]++; + free(p); + + for (j = 0; j < 256; j++) { + if (results[j] < lowest) + lowest = results[j]; + if (results[j] > highest) + highest = results[j]; + } + /* Expect within 20% */ + ok(lowest > 800, "hash_pointer byte %i lowest %i", i, lowest); + ok(highest < 1200, "hash_pointer byte %i highest %i", + i, highest); + diag("hash_pointer byte %i, range %u-%u", i, lowest, highest); + } + + if (sizeof(long) == sizeof(uint32_t)) + ok1(hashl(array, ARRAY_WORDS, 0) + == hash(array, ARRAY_WORDS, 0)); + else + ok1(hashl(array, ARRAY_WORDS, 0) + == hash64(array, ARRAY_WORDS, 0)); + + /* String hash: weak, so only test bottom byte */ + for (i = 0; i < 1; i++) { + unsigned int num = 0, cursor, lowest = -1U, highest = 0; + char p[5]; + + memset(results, 0, sizeof(results)); + + memset(p, 'A', sizeof(p)); + p[sizeof(p)-1] = '\0'; + + for (;;) { + for (cursor = 0; cursor < sizeof(p)-1; cursor++) { + p[cursor]++; + if (p[cursor] <= 'z') + break; + p[cursor] = 'A'; + } + if (cursor == sizeof(p)-1) + break; + + results[(hash_string(p) >> i*8)&0xFF]++; + num++; + } + + for (j = 0; j < 256; j++) { + if (results[j] < lowest) + lowest = results[j]; + if (results[j] > highest) + highest = results[j]; + } + /* Expect within 20% */ + ok(lowest > 35000, "hash_pointer byte %i lowest %i", i, lowest); + ok(highest < 53000, "hash_pointer byte %i highest %i", + i, highest); + diag("hash_pointer byte %i, range %u-%u", i, lowest, highest); + } + + return exit_status(); +} diff --git a/lib/ccan/htable/LICENSE b/lib/ccan/htable/LICENSE new file mode 100644 index 0000000000..d511905c16 --- /dev/null +++ b/lib/ccan/htable/LICENSE @@ -0,0 +1,339 @@ + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Lesser General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + <one line to give the program's name and a brief idea of what it does.> + Copyright (C) <year> <name of author> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) year name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + <signature of Ty Coon>, 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. diff --git a/lib/ccan/htable/_info b/lib/ccan/htable/_info new file mode 100644 index 0000000000..8dabe46a50 --- /dev/null +++ b/lib/ccan/htable/_info @@ -0,0 +1,115 @@ +#include <string.h> +#include <stdio.h> + +/** + * htable - hash table routines + * + * A hash table is an efficient structure for looking up keys. This version + * grows with usage and allows efficient deletion. + * + * Example: + * #include <ccan/htable/htable.h> + * #include <ccan/hash/hash.h> + * #include <stdio.h> + * #include <err.h> + * #include <string.h> + * + * struct name_to_digit { + * const char *name; + * unsigned int val; + * }; + * + * static struct name_to_digit map[] = { + * { "zero", 0}, + * { "one", 1 }, + * { "two", 2 }, + * { "three", 3 }, + * { "four", 4 }, + * { "five", 5 }, + * { "six", 6 }, + * { "seven", 7 }, + * { "eight", 8 }, + * { "nine", 9 } + * }; + * + * // Wrapper for rehash function pointer. + * static size_t rehash(const void *e, void *unused) + * { + * return hash_string(((struct name_to_digit *)e)->name); + * } + * + * // Comparison function. + * static bool streq(const void *e, void *string) + * { + * return strcmp(((struct name_to_digit *)e)->name, string) == 0; + * } + * + * // We let them add their own aliases, eg. --alias=v=5 + * static void add_alias(struct htable *ht, const char *alias) + * { + * char *eq; + * struct name_to_digit *n; + * + * n = malloc(sizeof(*n)); + * n->name = strdup(alias); + * + * eq = strchr(n->name, '='); + * if (!eq || ((n->val = atoi(eq+1)) == 0 && !strcmp(eq+1, "0"))) + * errx(1, "Usage: --alias=<name>=<value>"); + * *eq = '\0'; + * htable_add(ht, hash_string(n->name), n); + * } + * + * int main(int argc, char *argv[]) + * { + * struct htable *ht; + * unsigned int i; + * unsigned long val; + * + * if (argc < 2) + * errx(1, "Usage: %s [--alias=<name>=<val>]... <str>...", + * argv[0]); + * + * // Create and populate hash table. + * ht = htable_new(rehash, NULL); + * for (i = 0; i < sizeof(map)/sizeof(map[0]); i++) + * htable_add(ht, hash_string(map[i].name), &map[i]); + * + * // Add any aliases to the hash table. + * for (i = 1; i < argc; i++) { + * if (!strncmp(argv[i], "--alias=", strlen("--alias="))) + * add_alias(ht, argv[i] + strlen("--alias=")); + * else + * break; + * } + * + * // Find the other args in the hash table. + * for (val = 0; i < argc; i++) { + * struct name_to_digit *n; + * n = htable_get(ht, hash_string(argv[i]), + * streq, argv[i]); + * if (!n) + * errx(1, "Invalid digit name %s", argv[i]); + * // Append it to the value we are building up. + * val *= 10; + * val += n->val; + * } + * printf("%lu\n", val); + * return 0; + * } + * + * License: GPLv2 (or later) + * Author: Rusty Russell <rusty@rustcorp.com.au> + */ +int main(int argc, char *argv[]) +{ + if (argc != 2) + return 1; + + if (strcmp(argv[1], "depends") == 0) { + printf("ccan/compiler\n"); + return 0; + } + + return 1; +} diff --git a/lib/ccan/htable/htable.c b/lib/ccan/htable/htable.c new file mode 100644 index 0000000000..a15c54d795 --- /dev/null +++ b/lib/ccan/htable/htable.c @@ -0,0 +1,290 @@ +#include <ccan/htable/htable.h> +#include <ccan/compiler/compiler.h> +#include <stdint.h> +#include <stdlib.h> +#include <limits.h> +#include <stdbool.h> +#include <assert.h> + +/* This means a struct htable takes at least 512 bytes / 1k (32/64 bits). */ +#define HTABLE_BASE_BITS 7 + +/* We use 0x1 as deleted marker. */ +#define HTABLE_DELETED (0x1) + +struct htable { + size_t (*rehash)(const void *elem, void *priv); + void *priv; + unsigned int bits; + size_t elems, deleted, max, max_with_deleted; + /* These are the bits which are the same in all pointers. */ + uintptr_t common_mask, common_bits; + uintptr_t perfect_bit; + uintptr_t *table; +}; + +/* We clear out the bits which are always the same, and put metadata there. */ +static inline uintptr_t get_extra_ptr_bits(const struct htable *ht, + uintptr_t e) +{ + return e & ht->common_mask; +} + +static inline void *get_raw_ptr(const struct htable *ht, uintptr_t e) +{ + return (void *)((e & ~ht->common_mask) | ht->common_bits); +} + +static inline uintptr_t make_hval(const struct htable *ht, + const void *p, uintptr_t bits) +{ + return ((uintptr_t)p & ~ht->common_mask) | bits; +} + +static inline bool entry_is_valid(uintptr_t e) +{ + return e > HTABLE_DELETED; +} + +static inline uintptr_t get_hash_ptr_bits(const struct htable *ht, + size_t hash) +{ + /* Shuffling the extra bits (as specified in mask) down the + * end is quite expensive. But the lower bits are redundant, so + * we fold the value first. */ + return (hash ^ (hash >> ht->bits)) + & ht->common_mask & ~ht->perfect_bit; +} + +struct htable *htable_new(size_t (*rehash)(const void *elem, void *priv), + void *priv) +{ + struct htable *ht = malloc(sizeof(struct htable)); + if (ht) { + ht->bits = HTABLE_BASE_BITS; + ht->rehash = rehash; + ht->priv = priv; + ht->elems = 0; + ht->deleted = 0; + ht->max = ((size_t)1 << ht->bits) * 3 / 4; + ht->max_with_deleted = ((size_t)1 << ht->bits) * 9 / 10; + /* This guarantees we enter update_common first add. */ + ht->common_mask = -1; + ht->common_bits = 0; + ht->perfect_bit = 0; + ht->table = calloc(1 << ht->bits, sizeof(uintptr_t)); + if (!ht->table) { + free(ht); + ht = NULL; + } + } + return ht; +} + +void htable_free(const struct htable *ht) +{ + free((void *)ht->table); + free((void *)ht); +} + +static size_t hash_bucket(const struct htable *ht, size_t h) +{ + return h & ((1 << ht->bits)-1); +} + +static void *htable_val(const struct htable *ht, + struct htable_iter *i, size_t hash, uintptr_t perfect) +{ + uintptr_t h2 = get_hash_ptr_bits(ht, hash) | perfect; + + while (ht->table[i->off]) { + if (ht->table[i->off] != HTABLE_DELETED) { + if (get_extra_ptr_bits(ht, ht->table[i->off]) == h2) + return get_raw_ptr(ht, ht->table[i->off]); + } + i->off = (i->off + 1) & ((1 << ht->bits)-1); + h2 &= ~perfect; + } + return NULL; +} + +void *htable_firstval(const struct htable *ht, + struct htable_iter *i, size_t hash) +{ + i->off = hash_bucket(ht, hash); + return htable_val(ht, i, hash, ht->perfect_bit); +} + +void *htable_nextval(const struct htable *ht, + struct htable_iter *i, size_t hash) +{ + i->off = (i->off + 1) & ((1 << ht->bits)-1); + return htable_val(ht, i, hash, 0); +} + +void *htable_first(const struct htable *ht, struct htable_iter *i) +{ + for (i->off = 0; i->off < (size_t)1 << ht->bits; i->off++) { + if (entry_is_valid(ht->table[i->off])) + return get_raw_ptr(ht, ht->table[i->off]); + } + return NULL; +} + +void *htable_next(const struct htable *ht, struct htable_iter *i) +{ + for (i->off++; i->off < (size_t)1 << ht->bits; i->off++) { + if (entry_is_valid(ht->table[i->off])) + return get_raw_ptr(ht, ht->table[i->off]); + } + return NULL; +} + +/* This does not expand the hash table, that's up to caller. */ +static void ht_add(struct htable *ht, const void *new, size_t h) +{ + size_t i; + uintptr_t perfect = ht->perfect_bit; + + i = hash_bucket(ht, h); + + while (entry_is_valid(ht->table[i])) { + perfect = 0; + i = (i + 1) & ((1 << ht->bits)-1); + } + ht->table[i] = make_hval(ht, new, get_hash_ptr_bits(ht, h)|perfect); +} + +static COLD bool double_table(struct htable *ht) +{ + unsigned int i; + size_t oldnum = (size_t)1 << ht->bits; + uintptr_t *oldtable, e; + + oldtable = ht->table; + ht->table = calloc(1 << (ht->bits+1), sizeof(size_t)); + if (!ht->table) { + ht->table = oldtable; + return false; + } + ht->bits++; + ht->max *= 2; + ht->max_with_deleted *= 2; + + /* If we lost our "perfect bit", get it back now. */ + if (!ht->perfect_bit && ht->common_mask) { + for (i = 0; i < sizeof(ht->common_mask) * CHAR_BIT; i++) { + if (ht->common_mask & ((size_t)1 << i)) { + ht->perfect_bit = (size_t)1 << i; + break; + } + } + } + + for (i = 0; i < oldnum; i++) { + if (entry_is_valid(e = oldtable[i])) { + void *p = get_raw_ptr(ht, e); + ht_add(ht, p, ht->rehash(p, ht->priv)); + } + } + ht->deleted = 0; + free(oldtable); + return true; +} + +static COLD void rehash_table(struct htable *ht) +{ + size_t start, i; + uintptr_t e; + + /* Beware wrap cases: we need to start from first empty bucket. */ + for (start = 0; ht->table[start]; start++); + + for (i = 0; i < (size_t)1 << ht->bits; i++) { + size_t h = (i + start) & ((1 << ht->bits)-1); + e = ht->table[h]; + if (!e) + continue; + if (e == HTABLE_DELETED) + ht->table[h] = 0; + else if (!(e & ht->perfect_bit)) { + void *p = get_raw_ptr(ht, e); + ht->table[h] = 0; + ht_add(ht, p, ht->rehash(p, ht->priv)); + } + } + ht->deleted = 0; +} + +/* We stole some bits, now we need to put them back... */ +static COLD void update_common(struct htable *ht, const void *p) +{ + unsigned int i; + uintptr_t maskdiff, bitsdiff; + + if (ht->elems == 0) { + ht->common_mask = -1; + ht->common_bits = (uintptr_t)p; + ht->perfect_bit = 1; + return; + } + + /* Find bits which are unequal to old common set. */ + maskdiff = ht->common_bits ^ ((uintptr_t)p & ht->common_mask); + + /* These are the bits which go there in existing entries. */ + bitsdiff = ht->common_bits & maskdiff; + + for (i = 0; i < (size_t)1 << ht->bits; i++) { + if (!entry_is_valid(ht->table[i])) + continue; + /* Clear the bits no longer in the mask, set them as + * expected. */ + ht->table[i] &= ~maskdiff; + ht->table[i] |= bitsdiff; + } + + /* Take away those bits from our mask, bits and perfect bit. */ + ht->common_mask &= ~maskdiff; + ht->common_bits &= ~maskdiff; + ht->perfect_bit &= ~maskdiff; +} + +bool htable_add(struct htable *ht, size_t hash, const void *p) +{ + if (ht->elems+1 > ht->max && !double_table(ht)) + return false; + if (ht->elems+1 + ht->deleted > ht->max_with_deleted) + rehash_table(ht); + assert(p); + if (((uintptr_t)p & ht->common_mask) != ht->common_bits) + update_common(ht, p); + + ht_add(ht, p, hash); + ht->elems++; + return true; +} + +bool htable_del(struct htable *ht, size_t h, const void *p) +{ + struct htable_iter i; + void *c; + + for (c = htable_firstval(ht,&i,h); c; c = htable_nextval(ht,&i,h)) { + if (c == p) { + htable_delval(ht, &i); + return true; + } + } + return false; +} + +void htable_delval(struct htable *ht, struct htable_iter *i) +{ + assert(i->off < (size_t)1 << ht->bits); + assert(entry_is_valid(ht->table[i->off])); + + ht->elems--; + ht->table[i->off] = HTABLE_DELETED; + ht->deleted++; +} diff --git a/lib/ccan/htable/htable.h b/lib/ccan/htable/htable.h new file mode 100644 index 0000000000..b68442972c --- /dev/null +++ b/lib/ccan/htable/htable.h @@ -0,0 +1,138 @@ +#ifndef CCAN_HTABLE_H +#define CCAN_HTABLE_H +#include "config.h" +#include <stdbool.h> +#include <stdlib.h> + +struct htable; + +/** + * htable_new - allocate a hash tree. + * @rehash: hash function to use for rehashing. + * @priv: private argument to @rehash function. + */ +struct htable *htable_new(size_t (*hash)(const void *elem, void *priv), + void *priv); + +/** + * htable_free - dellocate a hash tree. + * + * This doesn't do anything to any pointers left in it. + */ +void htable_free(const struct htable *); + +/** + * htable_rehash - use a hashtree's rehash function + * @elem: the argument to rehash() + * + */ +size_t htable_rehash(const void *elem); + +/** + * htable_add - add a pointer into a hash tree. + * @ht: the htable + * @hash: the hash value of the object + * @p: the non-NULL pointer + * + * Also note that this can only fail due to allocation failure. Otherwise, it + * returns true. + */ +bool htable_add(struct htable *ht, size_t hash, const void *p); + +/** + * htable_del - remove a pointer from a hash tree + * @ht: the htable + * @hash: the hash value of the object + * @p: the pointer + * + * Returns true if the pointer was found (and deleted). + */ +bool htable_del(struct htable *ht, size_t hash, const void *p); + +/** + * struct htable_iter - iterator or htable_first or htable_firstval etc. + * + * This refers to a location inside the hashtable. + */ +struct htable_iter { + size_t off; +}; + +/** + * htable_firstval - find a candidate for a given hash value + * @htable: the hashtable + * @i: the struct htable_iter to initialize + * @hash: the hash value + * + * You'll need to check the value is what you want; returns NULL if none. + * See Also: + * htable_delval() + */ +void *htable_firstval(const struct htable *htable, + struct htable_iter *i, size_t hash); + +/** + * htable_nextval - find another candidate for a given hash value + * @htable: the hashtable + * @i: the struct htable_iter to initialize + * @hash: the hash value + * + * You'll need to check the value is what you want; returns NULL if no more. + */ +void *htable_nextval(const struct htable *htable, + struct htable_iter *i, size_t hash); + +/** + * htable_get - find an entry in the hash table + * @ht: the hashtable + * @h: the hash value of the entry + * @cmp: the comparison function + * @ptr: the pointer to hand to the comparison function. + * + * Convenient inline wrapper for htable_firstval/htable_nextval loop. + */ +static inline void *htable_get(const struct htable *ht, + size_t h, + bool (*cmp)(const void *candidate, void *ptr), + const void *ptr) +{ + struct htable_iter i; + void *c; + + for (c = htable_firstval(ht,&i,h); c; c = htable_nextval(ht,&i,h)) { + if (cmp(c, (void *)ptr)) + return c; + } + return NULL; +} + +/** + * htable_first - find an entry in the hash table + * @ht: the hashtable + * @i: the struct htable_iter to initialize + * + * Get an entry in the hashtable; NULL if empty. + */ +void *htable_first(const struct htable *htable, struct htable_iter *i); + +/** + * htable_next - find another entry in the hash table + * @ht: the hashtable + * @i: the struct htable_iter to use + * + * Get another entry in the hashtable; NULL if all done. + * This is usually used after htable_first or prior non-NULL htable_next. + */ +void *htable_next(const struct htable *htable, struct htable_iter *i); + +/** + * htable_delval - remove an iterated pointer from a hash tree + * @ht: the htable + * @i: the htable_iter + * + * Usually used to delete a hash entry after it has been found with + * htable_firstval etc. + */ +void htable_delval(struct htable *ht, struct htable_iter *i); + +#endif /* CCAN_HTABLE_H */ diff --git a/lib/ccan/htable/htable_type.h b/lib/ccan/htable/htable_type.h new file mode 100644 index 0000000000..0d9e3fbb2d --- /dev/null +++ b/lib/ccan/htable/htable_type.h @@ -0,0 +1,97 @@ +#ifndef CCAN_HTABLE_TYPE_H +#define CCAN_HTABLE_TYPE_H +#include <ccan/htable/htable.h> +#include "config.h" + +/** + * HTABLE_DEFINE_TYPE - create a set of htable ops for a type + * @type: a type whose pointers will be values in the hash. + * @keyof: a function/macro to extract a key from a @type element. + * @hashfn: a hash function for a @key + * @cmpfn: a comparison function for two keyof()s. + * @name: a name for all the functions to define (of form htable_<name>_*) + * + * NULL values may not be placed into the hash table. + * + * The following wrapper functions are defined; each one is a + * simplified version of the htable.h equivalent: + * + * // Creating and freeing. + * struct htable_@name *htable_@name_new(void); + * void htable_@name_free(const struct htable_@name *ht); + * + * // Add, delete and find. + * bool htable_@name_add(struct htable_@name *ht, const type *e); + * bool htable_@name_del(struct htable_@name *ht, const type *e); + * bool htable_@name_delkey(struct htable_@name *ht, const ktype *k); + * type *htable_@name_get(const struct htable_@name *ht, const ktype *k); + * + * // Iteration. + * struct htable_@name_iter; + * type *htable_@name_first(const struct htable_@name *ht, + * struct htable_@name_iter *i); + * type *htable_@name_next(const struct htable_@name *ht, + * struct htable_@name_iter *i); + */ +#define HTABLE_DEFINE_TYPE(type, keyof, hashfn, cmpfn, name) \ +struct htable_##name; \ +struct htable_##name##_iter { struct htable_iter i; }; \ +static inline size_t htable_##name##_hash(const void *elem, void *priv) \ +{ \ + return hashfn(keyof((const type *)elem)); \ +} \ +static inline struct htable_##name *htable_##name##_new(void) \ +{ \ + return (struct htable_##name *)htable_new(htable_##name##_hash, \ + NULL); \ +} \ +static inline void htable_##name##_free(const struct htable_##name *ht) \ +{ \ + htable_free((const struct htable *)ht); \ +} \ +static inline bool htable_##name##_add(struct htable_##name *ht, \ + const type *elem) \ +{ \ + return htable_add((struct htable *)ht, hashfn(keyof(elem)), elem); \ +} \ +static inline bool htable_##name##_del(const struct htable_##name *ht, \ + const type *elem) \ +{ \ + return htable_del((struct htable *)ht, hashfn(keyof(elem)), elem); \ +} \ +static inline type *htable_##name##_get(const struct htable_##name *ht, \ + const HTABLE_KTYPE(keyof) k) \ +{ \ + /* Typecheck for cmpfn */ \ + (void)sizeof(cmpfn((const type *)NULL, \ + keyof((const type *)NULL))); \ + return (type *)htable_get((const struct htable *)ht, \ + hashfn(k), \ + (bool (*)(const void *, void *))(cmpfn), \ + k); \ +} \ +static inline bool htable_##name##_delkey(struct htable_##name *ht, \ + const HTABLE_KTYPE(keyof) k) \ +{ \ + type *elem = htable_##name##_get(ht, k); \ + if (elem) \ + return htable_##name##_del(ht, elem); \ + return false; \ +} \ +static inline type *htable_##name##_first(const struct htable_##name *ht, \ + struct htable_##name##_iter *iter) \ +{ \ + return htable_first((const struct htable *)ht, &iter->i); \ +} \ +static inline type *htable_##name##_next(const struct htable_##name *ht, \ + struct htable_##name##_iter *iter) \ +{ \ + return htable_next((const struct htable *)ht, &iter->i); \ +} + +#if HAVE_TYPEOF +#define HTABLE_KTYPE(keyof) typeof(keyof(NULL)) +#else +#define HTABLE_KTYPE(keyof) void * +#endif +#endif /* CCAN_HTABLE_TYPE_H */ diff --git a/lib/ccan/htable/test/run-type.c b/lib/ccan/htable/test/run-type.c new file mode 100644 index 0000000000..02dac29e10 --- /dev/null +++ b/lib/ccan/htable/test/run-type.c @@ -0,0 +1,176 @@ +#include <ccan/htable/htable_type.h> +#include <ccan/htable/htable.c> +#include <ccan/tap/tap.h> +#include <stdbool.h> +#include <string.h> + +#define NUM_VALS (1 << HTABLE_BASE_BITS) + +struct obj { + /* Makes sure we don't try to treat and obj as a key or vice versa */ + unsigned char unused; + unsigned int key; +}; + +static const unsigned int *objkey(const struct obj *obj) +{ + return &obj->key; +} + +/* We use the number divided by two as the hash (for lots of + collisions), plus set all the higher bits so we can detect if they + don't get masked out. */ +static size_t objhash(const unsigned int *key) +{ + size_t h = *key / 2; + h |= -1UL << HTABLE_BASE_BITS; + return h; +} + +static bool cmp(const struct obj *obj, const unsigned int *key) +{ + return obj->key == *key; +} + +HTABLE_DEFINE_TYPE(struct obj, objkey, objhash, cmp, obj); + +static void add_vals(struct htable_obj *ht, + struct obj val[], unsigned int num) +{ + unsigned int i; + + for (i = 0; i < num; i++) { + if (htable_obj_get(ht, &i)) { + fail("%u already in hash", i); + return; + } + htable_obj_add(ht, &val[i]); + if (htable_obj_get(ht, &i) != &val[i]) { + fail("%u not added to hash", i); + return; + } + } + pass("Added %u numbers to hash", i); +} + +static void find_vals(const struct htable_obj *ht, + const struct obj val[], unsigned int num) +{ + unsigned int i; + + for (i = 0; i < num; i++) { + if (htable_obj_get(ht, &i) != &val[i]) { + fail("%u not found in hash", i); + return; + } + } + pass("Found %u numbers in hash", i); +} + +static void del_vals(struct htable_obj *ht, + const struct obj val[], unsigned int num) +{ + unsigned int i; + + for (i = 0; i < num; i++) { + if (!htable_obj_delkey(ht, &val[i].key)) { + fail("%u not deleted from hash", i); + return; + } + } + pass("Deleted %u numbers in hash", i); +} + +static void del_vals_bykey(struct htable_obj *ht, + const struct obj val[], unsigned int num) +{ + unsigned int i; + + for (i = 0; i < num; i++) { + if (!htable_obj_delkey(ht, &i)) { + fail("%u not deleted by key from hash", i); + return; + } + } + pass("Deleted %u numbers by key from hash", i); +} + +static bool check_mask(struct htable *ht, const struct obj val[], unsigned num) +{ + uint64_t i; + + for (i = 0; i < num; i++) { + if (((uintptr_t)&val[i] & ht->common_mask) != ht->common_bits) + return false; + } + return true; +} + +int main(int argc, char *argv[]) +{ + unsigned int i; + struct htable_obj *ht; + struct obj val[NUM_VALS]; + unsigned int dne; + void *p; + struct htable_obj_iter iter; + + plan_tests(20); + for (i = 0; i < NUM_VALS; i++) + val[i].key = i; + dne = i; + + ht = htable_obj_new(); + ok1(((struct htable *)ht)->max < (1 << ((struct htable *)ht)->bits)); + ok1(((struct htable *)ht)->bits == HTABLE_BASE_BITS); + + /* We cannot find an entry which doesn't exist. */ + ok1(!htable_obj_get(ht, &dne)); + + /* Fill it, it should increase in size (once). */ + add_vals(ht, val, NUM_VALS); + ok1(((struct htable *)ht)->bits == HTABLE_BASE_BITS + 1); + ok1(((struct htable *)ht)->max < (1 << ((struct htable *)ht)->bits)); + + /* Mask should be set. */ + ok1(((struct htable *)ht)->common_mask != 0); + ok1(((struct htable *)ht)->common_mask != -1); + ok1(check_mask((struct htable *)ht, val, NUM_VALS)); + + /* Find all. */ + find_vals(ht, val, NUM_VALS); + ok1(!htable_obj_get(ht, &dne)); + + /* Walk once, should get them all. */ + i = 0; + for (p = htable_obj_first(ht,&iter); p; p = htable_obj_next(ht, &iter)) + i++; + ok1(i == NUM_VALS); + + /* Delete all. */ + del_vals(ht, val, NUM_VALS); + ok1(!htable_obj_get(ht, &val[0].key)); + + /* Worst case, a "pointer" which doesn't have any matching bits. */ + htable_add((struct htable *)ht, 0, + (void *)~(uintptr_t)&val[NUM_VALS-1]); + htable_obj_add(ht, &val[NUM_VALS-1]); + ok1(((struct htable *)ht)->common_mask == 0); + ok1(((struct htable *)ht)->common_bits == 0); + /* Delete the bogus one before we trip over it. */ + htable_del((struct htable *)ht, 0, + (void *)~(uintptr_t)&val[NUM_VALS-1]); + + /* Add the rest. */ + add_vals(ht, val, NUM_VALS-1); + + /* Check we can find them all. */ + find_vals(ht, val, NUM_VALS); + ok1(!htable_obj_get(ht, &dne)); + + /* Delete them all by key. */ + del_vals_bykey(ht, val, NUM_VALS); + htable_obj_free(ht); + + return exit_status(); +} diff --git a/lib/ccan/htable/test/run.c b/lib/ccan/htable/test/run.c new file mode 100644 index 0000000000..ece46a0fd7 --- /dev/null +++ b/lib/ccan/htable/test/run.c @@ -0,0 +1,176 @@ +#include <ccan/htable/htable.h> +#include <ccan/htable/htable.c> +#include <ccan/tap/tap.h> +#include <stdbool.h> +#include <string.h> + +#define NUM_VALS (1 << HTABLE_BASE_BITS) + +/* We use the number divided by two as the hash (for lots of + collisions), plus set all the higher bits so we can detect if they + don't get masked out. */ +static size_t hash(const void *elem, void *unused) +{ + size_t h = *(uint64_t *)elem / 2; + h |= -1UL << HTABLE_BASE_BITS; + return h; +} + +static bool objcmp(const void *htelem, void *cmpdata) +{ + return *(uint64_t *)htelem == *(uint64_t *)cmpdata; +} + +static void add_vals(struct htable *ht, + const uint64_t val[], unsigned int num) +{ + uint64_t i; + + for (i = 0; i < num; i++) { + if (htable_get(ht, hash(&i, NULL), objcmp, &i)) { + fail("%llu already in hash", (long long)i); + return; + } + htable_add(ht, hash(&val[i], NULL), &val[i]); + if (htable_get(ht, hash(&i, NULL), objcmp, &i) != &val[i]) { + fail("%llu not added to hash", (long long)i); + return; + } + } + pass("Added %llu numbers to hash", (long long)i); +} + +#if 0 +static void refill_vals(struct htable *ht, + const uint64_t val[], unsigned int num) +{ + uint64_t i; + + for (i = 0; i < num; i++) { + if (htable_get(ht, hash(&i, NULL), objcmp, &i)) + continue; + htable_add(ht, hash(&val[i], NULL), &val[i]); + } +} +#endif + +static void find_vals(struct htable *ht, + const uint64_t val[], unsigned int num) +{ + uint64_t i; + + for (i = 0; i < num; i++) { + if (htable_get(ht, hash(&i, NULL), objcmp, &i) != &val[i]) { + fail("%llu not found in hash", (long long)i); + return; + } + } + pass("Found %llu numbers in hash", (long long)i); +} + +static void del_vals(struct htable *ht, + const uint64_t val[], unsigned int num) +{ + uint64_t i; + + for (i = 0; i < num; i++) { + if (!htable_del(ht, hash(&val[i], NULL), &val[i])) { + fail("%llu not deleted from hash", (long long)i); + return; + } + } + pass("Deleted %llu numbers in hash", (long long)i); +} + +static bool check_mask(struct htable *ht, uint64_t val[], unsigned num) +{ + uint64_t i; + + for (i = 0; i < num; i++) { + if (((uintptr_t)&val[i] & ht->common_mask) != ht->common_bits) + return false; + } + return true; +} + +int main(int argc, char *argv[]) +{ + unsigned int i; + uintptr_t perfect_bit; + struct htable *ht; + uint64_t val[NUM_VALS]; + uint64_t dne; + void *p; + struct htable_iter iter; + + plan_tests(23); + for (i = 0; i < NUM_VALS; i++) + val[i] = i; + dne = i; + + ht = htable_new(hash, NULL); + ok1(ht->max < (1 << ht->bits)); + ok1(ht->bits == HTABLE_BASE_BITS); + + /* We cannot find an entry which doesn't exist. */ + ok1(!htable_get(ht, hash(&dne, NULL), objcmp, &dne)); + + /* Fill it, it should increase in size (once). */ + add_vals(ht, val, NUM_VALS); + ok1(ht->bits == HTABLE_BASE_BITS + 1); + ok1(ht->max < (1 << ht->bits)); + + /* Mask should be set. */ + ok1(ht->common_mask != 0); + ok1(ht->common_mask != -1); + ok1(check_mask(ht, val, NUM_VALS)); + + /* Find all. */ + find_vals(ht, val, NUM_VALS); + ok1(!htable_get(ht, hash(&dne, NULL), objcmp, &dne)); + + /* Walk once, should get them all. */ + i = 0; + for (p = htable_first(ht,&iter); p; p = htable_next(ht, &iter)) + i++; + ok1(i == NUM_VALS); + + /* Delete all. */ + del_vals(ht, val, NUM_VALS); + ok1(!htable_get(ht, hash(&val[0], NULL), objcmp, &val[0])); + + /* Worst case, a "pointer" which doesn't have any matching bits. */ + htable_add(ht, 0, (void *)~(uintptr_t)&val[NUM_VALS-1]); + htable_add(ht, hash(&val[NUM_VALS-1], NULL), &val[NUM_VALS-1]); + ok1(ht->common_mask == 0); + ok1(ht->common_bits == 0); + /* Get rid of bogus pointer before we trip over it! */ + htable_del(ht, 0, (void *)~(uintptr_t)&val[NUM_VALS-1]); + + /* Add the rest. */ + add_vals(ht, val, NUM_VALS-1); + + /* Check we can find them all. */ + find_vals(ht, val, NUM_VALS); + ok1(!htable_get(ht, hash(&dne, NULL), objcmp, &dne)); + + htable_free(ht); + + /* Corner cases: wipe out the perfect bit using bogus pointer. */ + ht = htable_new(hash, NULL); + htable_add(ht, 0, (void *)((uintptr_t)&val[NUM_VALS-1])); + ok1(ht->perfect_bit); + perfect_bit = ht->perfect_bit; + htable_add(ht, 0, (void *)((uintptr_t)&val[NUM_VALS-1] + | perfect_bit)); + ok1(ht->perfect_bit == 0); + htable_del(ht, 0, (void *)((uintptr_t)&val[NUM_VALS-1] | perfect_bit)); + + /* Enlarging should restore it... */ + add_vals(ht, val, NUM_VALS-1); + + ok1(ht->perfect_bit != 0); + htable_free(ht); + + return exit_status(); +} diff --git a/lib/ccan/htable/tools/Makefile b/lib/ccan/htable/tools/Makefile new file mode 100644 index 0000000000..001e160b78 --- /dev/null +++ b/lib/ccan/htable/tools/Makefile @@ -0,0 +1,5 @@ +CFLAGS=-Wall -Werror -O3 -I../../.. + +speed: speed.o ../../hash.o + +speed.o: speed.c ../htable.h ../htable.c diff --git a/lib/ccan/htable/tools/speed.c b/lib/ccan/htable/tools/speed.c new file mode 100644 index 0000000000..26231924a1 --- /dev/null +++ b/lib/ccan/htable/tools/speed.c @@ -0,0 +1,377 @@ +/* Simple speed tests for hashtables. */ +#include <ccan/htable/htable_type.h> +#include <ccan/htable/htable.c> +#include <ccan/hash/hash.h> +#include <stdio.h> +#include <stdlib.h> +#include <time.h> +#include <unistd.h> +#include <sys/time.h> + +static size_t hashcount; +struct object { + /* The key. */ + unsigned int key; + + /* Some contents. Doubles as consistency check. */ + struct object *self; +}; + +static const unsigned int *objkey(const struct object *obj) +{ + return &obj->key; +} + +static size_t hash_obj(const unsigned int *key) +{ + hashcount++; + return hashl(key, 1, 0); +} + +static bool cmp(const unsigned int *key1, const unsigned int *key2) +{ + return *key1 == *key2; +} + +HTABLE_DEFINE_TYPE(struct object, objkey, hash_obj, cmp, obj); + +static unsigned int popcount(unsigned long val) +{ +#if HAVE_BUILTIN_POPCOUNTL + return __builtin_popcountl(val); +#else + if (sizeof(long) == sizeof(u64)) { + u64 v = val; + v = (v & 0x5555555555555555ULL) + + ((v >> 1) & 0x5555555555555555ULL); + v = (v & 0x3333333333333333ULL) + + ((v >> 1) & 0x3333333333333333ULL); + v = (v & 0x0F0F0F0F0F0F0F0FULL) + + ((v >> 1) & 0x0F0F0F0F0F0F0F0FULL); + v = (v & 0x00FF00FF00FF00FFULL) + + ((v >> 1) & 0x00FF00FF00FF00FFULL); + v = (v & 0x0000FFFF0000FFFFULL) + + ((v >> 1) & 0x0000FFFF0000FFFFULL); + v = (v & 0x00000000FFFFFFFFULL) + + ((v >> 1) & 0x00000000FFFFFFFFULL); + return v; + } + val = (val & 0x55555555ULL) + ((val >> 1) & 0x55555555ULL); + val = (val & 0x33333333ULL) + ((val >> 1) & 0x33333333ULL); + val = (val & 0x0F0F0F0FULL) + ((val >> 1) & 0x0F0F0F0FULL); + val = (val & 0x00FF00FFULL) + ((val >> 1) & 0x00FF00FFULL); + val = (val & 0x0000FFFFULL) + ((val >> 1) & 0x0000FFFFULL); + return val; +#endif +} + +static size_t perfect(const struct htable *ht) +{ + size_t i, placed_perfect = 0; + + for (i = 0; i < ((size_t)1 << ht->bits); i++) { + if (!entry_is_valid(ht->table[i])) + continue; + if (hash_bucket(ht, ht->rehash(get_raw_ptr(ht, ht->table[i]), + ht->priv)) == i) { + assert((ht->table[i] & ht->perfect_bit) + == ht->perfect_bit); + placed_perfect++; + } + } + return placed_perfect; +} + +static size_t count_deleted(const struct htable *ht) +{ + size_t i, delete_markers = 0; + + for (i = 0; i < ((size_t)1 << ht->bits); i++) { + if (ht->table[i] == HTABLE_DELETED) + delete_markers++; + } + return delete_markers; +} + +/* Nanoseconds per operation */ +static size_t normalize(const struct timeval *start, + const struct timeval *stop, + unsigned int num) +{ + struct timeval diff; + + timersub(stop, start, &diff); + + /* Floating point is more accurate here. */ + return (double)(diff.tv_sec * 1000000 + diff.tv_usec) + / num * 1000; +} + +static size_t worst_run(struct htable *ht, size_t *deleted) +{ + size_t longest = 0, len = 0, this_del = 0, i; + + *deleted = 0; + /* This doesn't take into account end-wrap, but gives an idea. */ + for (i = 0; i < ((size_t)1 << ht->bits); i++) { + if (ht->table[i]) { + len++; + if (ht->table[i] == HTABLE_DELETED) + this_del++; + } else { + if (len > longest) { + longest = len; + *deleted = this_del; + } + len = 0; + this_del = 0; + } + } + return longest; +} + +int main(int argc, char *argv[]) +{ + struct object *objs; + size_t i, j, num, deleted; + struct timeval start, stop; + struct htable_obj *ht; + struct htable *htr; + bool make_dumb = false; + + if (argv[1] && strcmp(argv[1], "--dumb") == 0) { + argv++; + make_dumb = true; + } + num = argv[1] ? atoi(argv[1]) : 1000000; + objs = calloc(num, sizeof(objs[0])); + + for (i = 0; i < num; i++) { + objs[i].key = i; + objs[i].self = &objs[i]; + } + + ht = htable_obj_new(); + htr = (void *)ht; + + printf("Initial insert: "); + fflush(stdout); + gettimeofday(&start, NULL); + for (i = 0; i < num; i++) + htable_obj_add(ht, objs[i].self); + gettimeofday(&stop, NULL); + printf(" %zu ns\n", normalize(&start, &stop, num)); + printf("Details: hash size %u, mask bits %u, perfect %.0f%%\n", + 1U << htr->bits, popcount(htr->common_mask), + perfect(htr) * 100.0 / htr->elems); + + if (make_dumb) { + /* Screw with mask, to hobble us. */ + update_common(htr, (void *)~htr->common_bits); + printf("Details: DUMB MODE: mask bits %u\n", + popcount(htr->common_mask)); + } + + printf("Initial lookup (match): "); + fflush(stdout); + gettimeofday(&start, NULL); + for (i = 0; i < num; i++) + if (htable_obj_get(ht, &i)->self != objs[i].self) + abort(); + gettimeofday(&stop, NULL); + printf(" %zu ns\n", normalize(&start, &stop, num)); + + printf("Initial lookup (miss): "); + fflush(stdout); + gettimeofday(&start, NULL); + for (i = 0; i < num; i++) { + unsigned int n = i + num; + if (htable_obj_get(ht, &n)) + abort(); + } + gettimeofday(&stop, NULL); + printf(" %zu ns\n", normalize(&start, &stop, num)); + + /* Lookups in order are very cache-friendly for judy; try random */ + printf("Initial lookup (random): "); + fflush(stdout); + gettimeofday(&start, NULL); + for (i = 0, j = 0; i < num; i++, j = (j + 10007) % num) + if (htable_obj_get(ht, &j)->self != &objs[j]) + abort(); + gettimeofday(&stop, NULL); + printf(" %zu ns\n", normalize(&start, &stop, num)); + + hashcount = 0; + printf("Initial delete all: "); + fflush(stdout); + gettimeofday(&start, NULL); + for (i = 0; i < num; i++) + if (!htable_obj_del(ht, objs[i].self)) + abort(); + gettimeofday(&stop, NULL); + printf(" %zu ns\n", normalize(&start, &stop, num)); + printf("Details: rehashes %zu\n", hashcount); + + printf("Initial re-inserting: "); + fflush(stdout); + gettimeofday(&start, NULL); + for (i = 0; i < num; i++) + htable_obj_add(ht, objs[i].self); + gettimeofday(&stop, NULL); + printf(" %zu ns\n", normalize(&start, &stop, num)); + + hashcount = 0; + printf("Deleting first half: "); + fflush(stdout); + gettimeofday(&start, NULL); + for (i = 0; i < num; i+=2) + if (!htable_obj_del(ht, objs[i].self)) + abort(); + gettimeofday(&stop, NULL); + printf(" %zu ns\n", normalize(&start, &stop, num)); + + printf("Details: rehashes %zu, delete markers %zu\n", + hashcount, count_deleted(htr)); + + printf("Adding (a different) half: "); + fflush(stdout); + + for (i = 0; i < num; i+=2) + objs[i].key = num+i; + + gettimeofday(&start, NULL); + for (i = 0; i < num; i+=2) + htable_obj_add(ht, objs[i].self); + gettimeofday(&stop, NULL); + printf(" %zu ns\n", normalize(&start, &stop, num)); + + printf("Details: delete markers %zu, perfect %.0f%%\n", + count_deleted(htr), perfect(htr) * 100.0 / htr->elems); + + printf("Lookup after half-change (match): "); + fflush(stdout); + gettimeofday(&start, NULL); + for (i = 1; i < num; i+=2) + if (htable_obj_get(ht, &i)->self != objs[i].self) + abort(); + for (i = 0; i < num; i+=2) { + unsigned int n = i + num; + if (htable_obj_get(ht, &n)->self != objs[i].self) + abort(); + } + gettimeofday(&stop, NULL); + printf(" %zu ns\n", normalize(&start, &stop, num)); + + printf("Lookup after half-change (miss): "); + fflush(stdout); + gettimeofday(&start, NULL); + for (i = 0; i < num; i++) { + unsigned int n = i + num * 2; + if (htable_obj_get(ht, &n)) + abort(); + } + gettimeofday(&stop, NULL); + printf(" %zu ns\n", normalize(&start, &stop, num)); + + /* Hashtables with delete markers can fill with markers over time. + * so do some changes to see how it operates in long-term. */ + for (i = 0; i < 5; i++) { + if (i == 0) { + /* We don't measure this: jmap is different. */ + printf("Details: initial churn\n"); + } else { + printf("Churning %s time: ", + i == 1 ? "second" + : i == 2 ? "third" + : i == 3 ? "fourth" + : "fifth"); + fflush(stdout); + } + gettimeofday(&start, NULL); + for (j = 0; j < num; j++) { + if (!htable_obj_del(ht, &objs[j])) + abort(); + objs[j].key = num*i+j; + if (!htable_obj_add(ht, &objs[j])) + abort(); + } + gettimeofday(&stop, NULL); + if (i != 0) + printf(" %zu ns\n", normalize(&start, &stop, num)); + } + + /* Spread out the keys more to try to make it harder. */ + printf("Details: reinserting with spread\n"); + for (i = 0; i < num; i++) { + if (!htable_obj_del(ht, objs[i].self)) + abort(); + objs[i].key = num * 5 + i * 9; + if (!htable_obj_add(ht, objs[i].self)) + abort(); + } + printf("Details: delete markers %zu, perfect %.0f%%\n", + count_deleted(htr), perfect(htr) * 100.0 / htr->elems); + i = worst_run(htr, &deleted); + printf("Details: worst run %zu (%zu deleted)\n", i, deleted); + + printf("Lookup after churn & spread (match): "); + fflush(stdout); + gettimeofday(&start, NULL); + for (i = 0; i < num; i++) { + unsigned int n = num * 5 + i * 9; + if (htable_obj_get(ht, &n)->self != objs[i].self) + abort(); + } + gettimeofday(&stop, NULL); + printf(" %zu ns\n", normalize(&start, &stop, num)); + + printf("Lookup after churn & spread (miss): "); + fflush(stdout); + gettimeofday(&start, NULL); + for (i = 0; i < num; i++) { + unsigned int n = num * (5 + 9) + i * 9; + if (htable_obj_get(ht, &n)) + abort(); + } + gettimeofday(&stop, NULL); + printf(" %zu ns\n", normalize(&start, &stop, num)); + + printf("Lookup after churn & spread (random): "); + fflush(stdout); + gettimeofday(&start, NULL); + for (i = 0, j = 0; i < num; i++, j = (j + 10007) % num) { + unsigned int n = num * 5 + j * 9; + if (htable_obj_get(ht, &n)->self != &objs[j]) + abort(); + } + gettimeofday(&stop, NULL); + printf(" %zu ns\n", normalize(&start, &stop, num)); + + hashcount = 0; + printf("Deleting half after churn & spread: "); + fflush(stdout); + gettimeofday(&start, NULL); + for (i = 0; i < num; i+=2) + if (!htable_obj_del(ht, objs[i].self)) + abort(); + gettimeofday(&stop, NULL); + printf(" %zu ns\n", normalize(&start, &stop, num)); + + printf("Adding (a different) half after churn & spread: "); + fflush(stdout); + + for (i = 0; i < num; i+=2) + objs[i].key = num*6+i*9; + + gettimeofday(&start, NULL); + for (i = 0; i < num; i+=2) + htable_obj_add(ht, objs[i].self); + gettimeofday(&stop, NULL); + printf(" %zu ns\n", normalize(&start, &stop, num)); + + printf("Details: delete markers %zu, perfect %.0f%%\n", + count_deleted(htr), perfect(htr) * 100.0 / htr->elems); + + return 0; +} diff --git a/lib/ccan/ilog/LICENSE b/lib/ccan/ilog/LICENSE new file mode 100644 index 0000000000..5522aa5f33 --- /dev/null +++ b/lib/ccan/ilog/LICENSE @@ -0,0 +1,508 @@ + + GNU LESSER GENERAL PUBLIC LICENSE + Version 2.1, February 1999 + + Copyright (C) 1991, 1999 Free Software Foundation, Inc. + 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + +[This is the first released version of the Lesser GPL. It also counts + as the successor of the GNU Library Public License, version 2, hence + the version number 2.1.] + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +Licenses are intended to guarantee your freedom to share and change +free software--to make sure the software is free for all its users. + + This license, the Lesser General Public License, applies to some +specially designated software packages--typically libraries--of the +Free Software Foundation and other authors who decide to use it. You +can use it too, but we suggest you first think carefully about whether +this license or the ordinary General Public License is the better +strategy to use in any particular case, based on the explanations +below. + + When we speak of free software, we are referring to freedom of use, +not price. Our General Public Licenses are designed to make sure that +you have the freedom to distribute copies of free software (and charge +for this service if you wish); that you receive source code or can get +it if you want it; that you can change the software and use pieces of +it in new free programs; and that you are informed that you can do +these things. + + To protect your rights, we need to make restrictions that forbid +distributors to deny you these rights or to ask you to surrender these +rights. These restrictions translate to certain responsibilities for +you if you distribute copies of the library or if you modify it. + + For example, if you distribute copies of the library, whether gratis +or for a fee, you must give the recipients all the rights that we gave +you. You must make sure that they, too, receive or can get the source +code. If you link other code with the library, you must provide +complete object files to the recipients, so that they can relink them +with the library after making changes to the library and recompiling +it. And you must show them these terms so they know their rights. + + We protect your rights with a two-step method: (1) we copyright the +library, and (2) we offer you this license, which gives you legal +permission to copy, distribute and/or modify the library. + + To protect each distributor, we want to make it very clear that +there is no warranty for the free library. Also, if the library is +modified by someone else and passed on, the recipients should know +that what they have is not the original version, so that the original +author's reputation will not be affected by problems that might be +introduced by others. + + Finally, software patents pose a constant threat to the existence of +any free program. We wish to make sure that a company cannot +effectively restrict the users of a free program by obtaining a +restrictive license from a patent holder. Therefore, we insist that +any patent license obtained for a version of the library must be +consistent with the full freedom of use specified in this license. + + Most GNU software, including some libraries, is covered by the +ordinary GNU General Public License. This license, the GNU Lesser +General Public License, applies to certain designated libraries, and +is quite different from the ordinary General Public License. We use +this license for certain libraries in order to permit linking those +libraries into non-free programs. + + When a program is linked with a library, whether statically or using +a shared library, the combination of the two is legally speaking a +combined work, a derivative of the original library. The ordinary +General Public License therefore permits such linking only if the +entire combination fits its criteria of freedom. The Lesser General +Public License permits more lax criteria for linking other code with +the library. + + We call this license the "Lesser" General Public License because it +does Less to protect the user's freedom than the ordinary General +Public License. It also provides other free software developers Less +of an advantage over competing non-free programs. These disadvantages +are the reason we use the ordinary General Public License for many +libraries. However, the Lesser license provides advantages in certain +special circumstances. + + For example, on rare occasions, there may be a special need to +encourage the widest possible use of a certain library, so that it +becomes a de-facto standard. To achieve this, non-free programs must +be allowed to use the library. A more frequent case is that a free +library does the same job as widely used non-free libraries. In this +case, there is little to gain by limiting the free library to free +software only, so we use the Lesser General Public License. + + In other cases, permission to use a particular library in non-free +programs enables a greater number of people to use a large body of +free software. For example, permission to use the GNU C Library in +non-free programs enables many more people to use the whole GNU +operating system, as well as its variant, the GNU/Linux operating +system. + + Although the Lesser General Public License is Less protective of the +users' freedom, it does ensure that the user of a program that is +linked with the Library has the freedom and the wherewithal to run +that program using a modified version of the Library. + + The precise terms and conditions for copying, distribution and +modification follow. Pay close attention to the difference between a +"work based on the library" and a "work that uses the library". The +former contains code derived from the library, whereas the latter must +be combined with the library in order to run. + + GNU LESSER GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License Agreement applies to any software library or other +program which contains a notice placed by the copyright holder or +other authorized party saying it may be distributed under the terms of +this Lesser General Public License (also called "this License"). +Each licensee is addressed as "you". + + A "library" means a collection of software functions and/or data +prepared so as to be conveniently linked with application programs +(which use some of those functions and data) to form executables. + + The "Library", below, refers to any such software library or work +which has been distributed under these terms. A "work based on the +Library" means either the Library or any derivative work under +copyright law: that is to say, a work containing the Library or a +portion of it, either verbatim or with modifications and/or translated +straightforwardly into another language. (Hereinafter, translation is +included without limitation in the term "modification".) + + "Source code" for a work means the preferred form of the work for +making modifications to it. For a library, complete source code means +all the source code for all modules it contains, plus any associated +interface definition files, plus the scripts used to control +compilation and installation of the library. + + Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running a program using the Library is not restricted, and output from +such a program is covered only if its contents constitute a work based +on the Library (independent of the use of the Library in a tool for +writing it). Whether that is true depends on what the Library does +and what the program that uses the Library does. + + 1. You may copy and distribute verbatim copies of the Library's +complete source code as you receive it, in any medium, provided that +you conspicuously and appropriately publish on each copy an +appropriate copyright notice and disclaimer of warranty; keep intact +all the notices that refer to this License and to the absence of any +warranty; and distribute a copy of this License along with the +Library. + + You may charge a fee for the physical act of transferring a copy, +and you may at your option offer warranty protection in exchange for a +fee. + + 2. You may modify your copy or copies of the Library or any portion +of it, thus forming a work based on the Library, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) The modified work must itself be a software library. + + b) You must cause the files modified to carry prominent notices + stating that you changed the files and the date of any change. + + c) You must cause the whole of the work to be licensed at no + charge to all third parties under the terms of this License. + + d) If a facility in the modified Library refers to a function or a + table of data to be supplied by an application program that uses + the facility, other than as an argument passed when the facility + is invoked, then you must make a good faith effort to ensure that, + in the event an application does not supply such function or + table, the facility still operates, and performs whatever part of + its purpose remains meaningful. + + (For example, a function in a library to compute square roots has + a purpose that is entirely well-defined independent of the + application. Therefore, Subsection 2d requires that any + application-supplied function or table used by this function must + be optional: if the application does not supply it, the square + root function must still compute square roots.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Library, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Library, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote +it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Library. + +In addition, mere aggregation of another work not based on the Library +with the Library (or with a work based on the Library) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may opt to apply the terms of the ordinary GNU General Public +License instead of this License to a given copy of the Library. To do +this, you must alter all the notices that refer to this License, so +that they refer to the ordinary GNU General Public License, version 2, +instead of to this License. (If a newer version than version 2 of the +ordinary GNU General Public License has appeared, then you can specify +that version instead if you wish.) Do not make any other change in +these notices. + + Once this change is made in a given copy, it is irreversible for +that copy, so the ordinary GNU General Public License applies to all +subsequent copies and derivative works made from that copy. + + This option is useful when you wish to copy part of the code of +the Library into a program that is not a library. + + 4. You may copy and distribute the Library (or a portion or +derivative of it, under Section 2) in object code or executable form +under the terms of Sections 1 and 2 above provided that you accompany +it with the complete corresponding machine-readable source code, which +must be distributed under the terms of Sections 1 and 2 above on a +medium customarily used for software interchange. + + If distribution of object code is made by offering access to copy +from a designated place, then offering equivalent access to copy the +source code from the same place satisfies the requirement to +distribute the source code, even though third parties are not +compelled to copy the source along with the object code. + + 5. A program that contains no derivative of any portion of the +Library, but is designed to work with the Library by being compiled or +linked with it, is called a "work that uses the Library". Such a +work, in isolation, is not a derivative work of the Library, and +therefore falls outside the scope of this License. + + However, linking a "work that uses the Library" with the Library +creates an executable that is a derivative of the Library (because it +contains portions of the Library), rather than a "work that uses the +library". The executable is therefore covered by this License. +Section 6 states terms for distribution of such executables. + + When a "work that uses the Library" uses material from a header file +that is part of the Library, the object code for the work may be a +derivative work of the Library even though the source code is not. +Whether this is true is especially significant if the work can be +linked without the Library, or if the work is itself a library. The +threshold for this to be true is not precisely defined by law. + + If such an object file uses only numerical parameters, data +structure layouts and accessors, and small macros and small inline +functions (ten lines or less in length), then the use of the object +file is unrestricted, regardless of whether it is legally a derivative +work. (Executables containing this object code plus portions of the +Library will still fall under Section 6.) + + Otherwise, if the work is a derivative of the Library, you may +distribute the object code for the work under the terms of Section 6. +Any executables containing that work also fall under Section 6, +whether or not they are linked directly with the Library itself. + + 6. As an exception to the Sections above, you may also combine or +link a "work that uses the Library" with the Library to produce a +work containing portions of the Library, and distribute that work +under terms of your choice, provided that the terms permit +modification of the work for the customer's own use and reverse +engineering for debugging such modifications. + + You must give prominent notice with each copy of the work that the +Library is used in it and that the Library and its use are covered by +this License. You must supply a copy of this License. If the work +during execution displays copyright notices, you must include the +copyright notice for the Library among them, as well as a reference +directing the user to the copy of this License. Also, you must do one +of these things: + + a) Accompany the work with the complete corresponding + machine-readable source code for the Library including whatever + changes were used in the work (which must be distributed under + Sections 1 and 2 above); and, if the work is an executable linked + with the Library, with the complete machine-readable "work that + uses the Library", as object code and/or source code, so that the + user can modify the Library and then relink to produce a modified + executable containing the modified Library. (It is understood + that the user who changes the contents of definitions files in the + Library will not necessarily be able to recompile the application + to use the modified definitions.) + + b) Use a suitable shared library mechanism for linking with the + Library. A suitable mechanism is one that (1) uses at run time a + copy of the library already present on the user's computer system, + rather than copying library functions into the executable, and (2) + will operate properly with a modified version of the library, if + the user installs one, as long as the modified version is + interface-compatible with the version that the work was made with. + + c) Accompany the work with a written offer, valid for at least + three years, to give the same user the materials specified in + Subsection 6a, above, for a charge no more than the cost of + performing this distribution. + + d) If distribution of the work is made by offering access to copy + from a designated place, offer equivalent access to copy the above + specified materials from the same place. + + e) Verify that the user has already received a copy of these + materials or that you have already sent this user a copy. + + For an executable, the required form of the "work that uses the +Library" must include any data and utility programs needed for +reproducing the executable from it. However, as a special exception, +the materials to be distributed need not include anything that is +normally distributed (in either source or binary form) with the major +components (compiler, kernel, and so on) of the operating system on +which the executable runs, unless that component itself accompanies +the executable. + + It may happen that this requirement contradicts the license +restrictions of other proprietary libraries that do not normally +accompany the operating system. Such a contradiction means you cannot +use both them and the Library together in an executable that you +distribute. + + 7. You may place library facilities that are a work based on the +Library side-by-side in a single library together with other library +facilities not covered by this License, and distribute such a combined +library, provided that the separate distribution of the work based on +the Library and of the other library facilities is otherwise +permitted, and provided that you do these two things: + + a) Accompany the combined library with a copy of the same work + based on the Library, uncombined with any other library + facilities. This must be distributed under the terms of the + Sections above. + + b) Give prominent notice with the combined library of the fact + that part of it is a work based on the Library, and explaining + where to find the accompanying uncombined form of the same work. + + 8. You may not copy, modify, sublicense, link with, or distribute +the Library except as expressly provided under this License. Any +attempt otherwise to copy, modify, sublicense, link with, or +distribute the Library is void, and will automatically terminate your +rights under this License. However, parties who have received copies, +or rights, from you under this License will not have their licenses +terminated so long as such parties remain in full compliance. + + 9. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Library or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Library (or any work based on the +Library), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Library or works based on it. + + 10. Each time you redistribute the Library (or any work based on the +Library), the recipient automatically receives a license from the +original licensor to copy, distribute, link with or modify the Library +subject to these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties with +this License. + + 11. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Library at all. For example, if a patent +license would not permit royalty-free redistribution of the Library by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Library. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply, and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 12. If the distribution and/or use of the Library is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Library under this License +may add an explicit geographical distribution limitation excluding those +countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 13. The Free Software Foundation may publish revised and/or new +versions of the Lesser General Public License from time to time. +Such new versions will be similar in spirit to the present version, +but may differ in detail to address new problems or concerns. + +Each version is given a distinguishing version number. If the Library +specifies a version number of this License which applies to it and +"any later version", you have the option of following the terms and +conditions either of that version or of any later version published by +the Free Software Foundation. If the Library does not specify a +license version number, you may choose any version ever published by +the Free Software Foundation. + + 14. If you wish to incorporate parts of the Library into other free +programs whose distribution conditions are incompatible with these, +write to the author to ask for permission. For software which is +copyrighted by the Free Software Foundation, write to the Free +Software Foundation; we sometimes make exceptions for this. Our +decision will be guided by the two goals of preserving the free status +of all derivatives of our free software and of promoting the sharing +and reuse of software generally. + + NO WARRANTY + + 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO +WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. +EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR +OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY +KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE +LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME +THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN +WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY +AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU +FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR +CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE +LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING +RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A +FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF +SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH +DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Libraries + + If you develop a new library, and you want it to be of the greatest +possible use to the public, we recommend making it free software that +everyone can redistribute and change. You can do so by permitting +redistribution under these terms (or, alternatively, under the terms +of the ordinary General Public License). + + To apply these terms, attach the following notices to the library. +It is safest to attach them to the start of each source file to most +effectively convey the exclusion of warranty; and each file should +have at least the "copyright" line and a pointer to where the full +notice is found. + + + <one line to give the library's name and a brief idea of what it does.> + Copyright (C) <year> <name of author> + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +Also add information on how to contact you by electronic and paper mail. + +You should also get your employer (if you work as a programmer) or +your school, if any, to sign a "copyright disclaimer" for the library, +if necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the + library `Frob' (a library for tweaking knobs) written by James + Random Hacker. + + <signature of Ty Coon>, 1 April 1990 + Ty Coon, President of Vice + +That's all there is to it! diff --git a/lib/ccan/ilog/_info b/lib/ccan/ilog/_info new file mode 100644 index 0000000000..56de50d610 --- /dev/null +++ b/lib/ccan/ilog/_info @@ -0,0 +1,47 @@ +/** + * ilog - Integer logarithm. + * + * ilog_32() and ilog_64() compute the minimum number of bits required to store + * an unsigned 32-bit or 64-bit value without any leading zero bits. + * This can also be thought of as the location of the highest set bit, with + * counting starting from one (so that 0 returns 0, 1 returns 1, and 2**31 + * returns 32). + * When the value is known to be non-zero ilog32_nz() and ilog64_nz() can + * compile into as few as two instructions, one of which may get optimized out + * later. + * STATIC_ILOG_32 and STATIC_ILOG_64 allow computation on compile-time + * constants, so other compile-time constants can be derived from them. + * + * Example: + * #include <stdio.h> + * #include <limits.h> + * #include <ccan/ilog/ilog.h> + * + * int main(void){ + * int i; + * printf("ilog32(0x%08X)=%i\n",0,ilog32(0)); + * for(i=1;i<=STATIC_ILOG_32(USHRT_MAX);i++){ + * uint32_t v; + * v=(uint32_t)1U<<(i-1); + * //Here we know v is non-zero, so we can use ilog32_nz(). + * printf("ilog32(0x%08X)=%i\n",v,ilog32_nz(v)); + * } + * return 0; + * } + * + * License: LGPL (v2 or later) + * Author: Timothy B. Terriberry <tterribe@xiph.org> + */ +#include <string.h> +#include <stdio.h> +#include "config.h" + +int main(int _argc,const char *_argv[]){ + /*Expect exactly one argument.*/ + if(_argc!=2)return 1; + if(strcmp(_argv[1],"depends")==0){ + printf("ccan/compiler\n"); + return 0; + } + return 1; +} diff --git a/lib/ccan/ilog/ilog.c b/lib/ccan/ilog/ilog.c new file mode 100644 index 0000000000..40c5a6fd50 --- /dev/null +++ b/lib/ccan/ilog/ilog.c @@ -0,0 +1,139 @@ +/*(C) Timothy B. Terriberry (tterribe@xiph.org) 2001-2009 LGPL (v2 or later).*/ +#include "ilog.h" +#include <limits.h> + +/*The fastest fallback strategy for platforms with fast multiplication appears + to be based on de Bruijn sequences~\cite{LP98}. + Tests confirmed this to be true even on an ARM11, where it is actually faster + than using the native clz instruction. + Define ILOG_NODEBRUIJN to use a simpler fallback on platforms where + multiplication or table lookups are too expensive. + + @UNPUBLISHED{LP98, + author="Charles E. Leiserson and Harald Prokop", + title="Using de {Bruijn} Sequences to Index a 1 in a Computer Word", + month=Jun, + year=1998, + note="\url{http://supertech.csail.mit.edu/papers/debruijn.pdf}" + }*/ +static UNNEEDED const unsigned char DEBRUIJN_IDX32[32]={ + 0, 1,28, 2,29,14,24, 3,30,22,20,15,25,17, 4, 8, + 31,27,13,23,21,19,16, 7,26,12,18, 6,11, 5,10, 9 +}; + +/* We always compile these in, in case someone takes address of function. */ +#undef ilog32_nz +#undef ilog32 +#undef ilog64_nz +#undef ilog64 + +int ilog32(uint32_t _v){ +/*On a Pentium M, this branchless version tested as the fastest version without + multiplications on 1,000,000,000 random 32-bit integers, edging out a + similar version with branches, and a 256-entry LUT version.*/ +# if defined(ILOG_NODEBRUIJN) + int ret; + int m; + ret=_v>0; + m=(_v>0xFFFFU)<<4; + _v>>=m; + ret|=m; + m=(_v>0xFFU)<<3; + _v>>=m; + ret|=m; + m=(_v>0xFU)<<2; + _v>>=m; + ret|=m; + m=(_v>3)<<1; + _v>>=m; + ret|=m; + ret+=_v>1; + return ret; +/*This de Bruijn sequence version is faster if you have a fast multiplier.*/ +# else + int ret; + ret=_v>0; + _v|=_v>>1; + _v|=_v>>2; + _v|=_v>>4; + _v|=_v>>8; + _v|=_v>>16; + _v=(_v>>1)+1; + ret+=DEBRUIJN_IDX32[_v*0x77CB531U>>27&0x1F]; + return ret; +# endif +} + +int ilog32_nz(uint32_t _v) +{ + return ilog32(_v); +} + +int ilog64(uint64_t _v){ +# if defined(ILOG_NODEBRUIJN) + uint32_t v; + int ret; + int m; + ret=_v>0; + m=(_v>0xFFFFFFFFU)<<5; + v=(uint32_t)(_v>>m); + ret|=m; + m=(v>0xFFFFU)<<4; + v>>=m; + ret|=m; + m=(v>0xFFU)<<3; + v>>=m; + ret|=m; + m=(v>0xFU)<<2; + v>>=m; + ret|=m; + m=(v>3)<<1; + v>>=m; + ret|=m; + ret+=v>1; + return ret; +# else +/*If we don't have a 64-bit word, split it into two 32-bit halves.*/ +# if LONG_MAX<9223372036854775807LL + uint32_t v; + int ret; + int m; + ret=_v>0; + m=(_v>0xFFFFFFFFU)<<5; + v=(uint32_t)(_v>>m); + ret|=m; + v|=v>>1; + v|=v>>2; + v|=v>>4; + v|=v>>8; + v|=v>>16; + v=(v>>1)+1; + ret+=DEBRUIJN_IDX32[v*0x77CB531U>>27&0x1F]; + return ret; +/*Otherwise do it in one 64-bit operation.*/ +# else + static const unsigned char DEBRUIJN_IDX64[64]={ + 0, 1, 2, 7, 3,13, 8,19, 4,25,14,28, 9,34,20,40, + 5,17,26,38,15,46,29,48,10,31,35,54,21,50,41,57, + 63, 6,12,18,24,27,33,39,16,37,45,47,30,53,49,56, + 62,11,23,32,36,44,52,55,61,22,43,51,60,42,59,58 + }; + int ret; + ret=_v>0; + _v|=_v>>1; + _v|=_v>>2; + _v|=_v>>4; + _v|=_v>>8; + _v|=_v>>16; + _v|=_v>>32; + _v=(_v>>1)+1; + ret+=DEBRUIJN_IDX64[_v*0x218A392CD3D5DBF>>58&0x3F]; + return ret; +# endif +# endif +} + +int ilog64_nz(uint64_t _v) +{ + return ilog64(_v); +} diff --git a/lib/ccan/ilog/ilog.h b/lib/ccan/ilog/ilog.h new file mode 100644 index 0000000000..55dd009885 --- /dev/null +++ b/lib/ccan/ilog/ilog.h @@ -0,0 +1,150 @@ +#if !defined(_ilog_H) +# define _ilog_H (1) +# include "config.h" +# include <stdint.h> +# include <limits.h> +# include <ccan/compiler/compiler.h> + +/** + * ilog32 - Integer binary logarithm of a 32-bit value. + * @_v: A 32-bit value. + * Returns floor(log2(_v))+1, or 0 if _v==0. + * This is the number of bits that would be required to represent _v in two's + * complement notation with all of the leading zeros stripped. + * Note that many uses will resolve to the fast macro version instead. + * + * See Also: + * ilog32_nz(), ilog64() + * + * Example: + * // Rounds up to next power of 2 (if not a power of 2). + * static uint32_t round_up32(uint32_t i) + * { + * assert(i != 0); + * return 1U << ilog32(i-1); + * } + */ +int ilog32(uint32_t _v) IDEMPOTENT; + +/** + * ilog32_nz - Integer binary logarithm of a non-zero 32-bit value. + * @_v: A 32-bit value. + * Returns floor(log2(_v))+1, or undefined if _v==0. + * This is the number of bits that would be required to represent _v in two's + * complement notation with all of the leading zeros stripped. + * Note that many uses will resolve to the fast macro version instead. + * See Also: + * ilog32(), ilog64_nz() + * Example: + * // Find Last Set (ie. highest bit set, 0 to 31). + * static uint32_t fls32(uint32_t i) + * { + * assert(i != 0); + * return ilog32_nz(i) - 1; + * } + */ +int ilog32_nz(uint32_t _v) IDEMPOTENT; + +/** + * ilog64 - Integer binary logarithm of a 64-bit value. + * @_v: A 64-bit value. + * Returns floor(log2(_v))+1, or 0 if _v==0. + * This is the number of bits that would be required to represent _v in two's + * complement notation with all of the leading zeros stripped. + * Note that many uses will resolve to the fast macro version instead. + * See Also: + * ilog64_nz(), ilog32() + */ +int ilog64(uint64_t _v) IDEMPOTENT; + +/** + * ilog64_nz - Integer binary logarithm of a non-zero 64-bit value. + * @_v: A 64-bit value. + * Returns floor(log2(_v))+1, or undefined if _v==0. + * This is the number of bits that would be required to represent _v in two's + * complement notation with all of the leading zeros stripped. + * Note that many uses will resolve to the fast macro version instead. + * See Also: + * ilog64(), ilog32_nz() + */ +int ilog64_nz(uint64_t _v) IDEMPOTENT; + +/** + * STATIC_ILOG_32 - The integer logarithm of an (unsigned, 32-bit) constant. + * @_v: A non-negative 32-bit constant. + * Returns floor(log2(_v))+1, or 0 if _v==0. + * This is the number of bits that would be required to represent _v in two's + * complement notation with all of the leading zeros stripped. + * This macro should only be used when you need a compile-time constant, + * otherwise ilog32 or ilog32_nz are just as fast and more flexible. + * + * Example: + * #define MY_PAGE_SIZE 4096 + * #define MY_PAGE_BITS (STATIC_ILOG_32(PAGE_SIZE) - 1) + */ +#define STATIC_ILOG_32(_v) (STATIC_ILOG5((uint32_t)(_v))) + +/** + * STATIC_ILOG_64 - The integer logarithm of an (unsigned, 64-bit) constant. + * @_v: A non-negative 64-bit constant. + * Returns floor(log2(_v))+1, or 0 if _v==0. + * This is the number of bits that would be required to represent _v in two's + * complement notation with all of the leading zeros stripped. + * This macro should only be used when you need a compile-time constant, + * otherwise ilog64 or ilog64_nz are just as fast and more flexible. + */ +#define STATIC_ILOG_64(_v) (STATIC_ILOG6((uint64_t)(_v))) + +/* Private implementation details */ + +/*Note the casts to (int) below: this prevents "upgrading" + the type of an entire expression to an (unsigned) size_t.*/ +#if INT_MAX>=2147483647 && HAVE_BUILTIN_CLZ +#define builtin_ilog32_nz(v) \ + (((int)sizeof(unsigned)*CHAR_BIT) - __builtin_clz(v)) +#elif LONG_MAX>=2147483647L && HAVE_BUILTIN_CLZL +#define builtin_ilog32_nz(v) \ + (((int)sizeof(unsigned)*CHAR_BIT) - __builtin_clzl(v)) +#endif + +#if INT_MAX>=9223372036854775807LL && HAVE_BUILTIN_CLZ +#define builtin_ilog64_nz(v) \ + (((int)sizeof(unsigned)*CHAR_BIT) - __builtin_clz(v)) +#elif LONG_MAX>=9223372036854775807LL && HAVE_BUILTIN_CLZL +#define builtin_ilog64_nz(v) \ + (((int)sizeof(unsigned long)*CHAR_BIT) - __builtin_clzl(v)) +#elif HAVE_BUILTIN_CLZLL +#define builtin_ilog64_nz(v) \ + (((int)sizeof(unsigned long long)*CHAR_BIT) - __builtin_clzll(v)) +#endif + +#ifdef builtin_ilog32_nz +#define ilog32(_v) (builtin_ilog32_nz(_v)&-!!(_v)) +#define ilog32_nz(_v) builtin_ilog32_nz(_v) +#else +#define ilog32_nz(_v) ilog32(_v) +#define ilog32(_v) (IS_COMPILE_CONSTANT(_v) ? STATIC_ILOG_32(_v) : ilog32(_v)) +#endif /* builtin_ilog32_nz */ + +#ifdef builtin_ilog64_nz +#define ilog64(_v) (builtin_ilog64_nz(_v)&-!!(_v)) +#define ilog64_nz(_v) builtin_ilog64_nz(_v) +#else +#define ilog64_nz(_v) ilog64(_v) +#define ilog64(_v) (IS_COMPILE_CONSTANT(_v) ? STATIC_ILOG_64(_v) : ilog64(_v)) +#endif /* builtin_ilog64_nz */ + +/* Macros for evaluating compile-time constant ilog. */ +# define STATIC_ILOG0(_v) (!!(_v)) +# define STATIC_ILOG1(_v) (((_v)&0x2)?2:STATIC_ILOG0(_v)) +# define STATIC_ILOG2(_v) (((_v)&0xC)?2+STATIC_ILOG1((_v)>>2):STATIC_ILOG1(_v)) +# define STATIC_ILOG3(_v) \ + (((_v)&0xF0)?4+STATIC_ILOG2((_v)>>4):STATIC_ILOG2(_v)) +# define STATIC_ILOG4(_v) \ + (((_v)&0xFF00)?8+STATIC_ILOG3((_v)>>8):STATIC_ILOG3(_v)) +# define STATIC_ILOG5(_v) \ + (((_v)&0xFFFF0000)?16+STATIC_ILOG4((_v)>>16):STATIC_ILOG4(_v)) +# define STATIC_ILOG6(_v) \ + (((_v)&0xFFFFFFFF00000000ULL)?32+STATIC_ILOG5((_v)>>32):STATIC_ILOG5(_v)) + +#endif /* _ilog_H */ diff --git a/lib/ccan/ilog/test/run-out-of-line.c b/lib/ccan/ilog/test/run-out-of-line.c new file mode 100644 index 0000000000..48205d380e --- /dev/null +++ b/lib/ccan/ilog/test/run-out-of-line.c @@ -0,0 +1,65 @@ +#include <ccan/ilog/ilog.h> +#include <ccan/ilog/ilog.c> +#include <stdio.h> +#include <ccan/tap/tap.h> + +/*Dead simple (but slow) versions to compare against.*/ + +static int test_ilog32(uint32_t _v){ + int ret; + for(ret=0;_v;ret++)_v>>=1; + return ret; +} + +static int test_ilog64(uint64_t _v){ + int ret; + for(ret=0;_v;ret++)_v>>=1; + return ret; +} + +#define NTRIALS (64) + +int main(int _argc,const char *_argv[]){ + int i; + int j; + int (*il32)(uint32_t) = ilog32; + int (*il64)(uint64_t) = ilog64; + int (*il32_nz)(uint32_t) = ilog32_nz; + int (*il64_nz)(uint64_t) = ilog64_nz; + + /*This is how many tests you plan to run.*/ + plan_tests(33 * NTRIALS * 3 + 65 * NTRIALS * 3); + for(i=0;i<=32;i++){ + uint32_t v; + /*Test each bit in turn (and 0).*/ + v=i?(uint32_t)1U<<(i-1):0; + for(j=0;j<NTRIALS;j++){ + int l; + l=test_ilog32(v); + ok1(STATIC_ILOG_32(v)==l); + ok1(il32(v)==l); + ok1(il32_nz(v) == l || v == 0); + /*Also try a few more pseudo-random values with at most the same number + of bits.*/ + v=(1103515245U*v+12345U)&0xFFFFFFFFU>>((33-i)>>1)>>((32-i)>>1); + } + } + + for(i=0;i<=64;i++){ + uint64_t v; + /*Test each bit in turn (and 0).*/ + v=i?(uint64_t)1U<<(i-1):0; + for(j=0;j<NTRIALS;j++){ + int l; + l=test_ilog64(v); + ok1(STATIC_ILOG_64(v)==l); + ok1(il64(v)==l); + ok1(il64_nz(v) == l || v == 0); + /*Also try a few more pseudo-random values with at most the same number + of bits.*/ + v=(uint64_t)((2862933555777941757ULL*v+3037000493ULL) + &0xFFFFFFFFFFFFFFFFULL>>((65-i)>>1)>>((64-i)>>1)); + } + } + return exit_status(); +} diff --git a/lib/ccan/ilog/test/run.c b/lib/ccan/ilog/test/run.c new file mode 100644 index 0000000000..bda59f920a --- /dev/null +++ b/lib/ccan/ilog/test/run.c @@ -0,0 +1,60 @@ +#include <ccan/ilog/ilog.h> +#include <ccan/ilog/ilog.c> +#include <stdio.h> +#include <ccan/tap/tap.h> + +/*Dead simple (but slow) versions to compare against.*/ + +static int test_ilog32(uint32_t _v){ + int ret; + for(ret=0;_v;ret++)_v>>=1; + return ret; +} + +static int test_ilog64(uint64_t _v){ + int ret; + for(ret=0;_v;ret++)_v>>=1; + return ret; +} + +#define NTRIALS (64) + +int main(int _argc,const char *_argv[]){ + int i; + int j; + /*This is how many tests you plan to run.*/ + plan_tests(33 * NTRIALS * 3 + 65 * NTRIALS * 3); + for(i=0;i<=32;i++){ + uint32_t v; + /*Test each bit in turn (and 0).*/ + v=i?(uint32_t)1U<<(i-1):0; + for(j=0;j<NTRIALS;j++){ + int l; + l=test_ilog32(v); + ok1(STATIC_ILOG_32(v)==l); + ok1(ilog32(v)==l); + ok1(ilog32_nz(v) == l || v == 0); + /*Also try a few more pseudo-random values with at most the same number + of bits.*/ + v=(1103515245U*v+12345U)&0xFFFFFFFFU>>((33-i)>>1)>>((32-i)>>1); + } + } + + for(i=0;i<=64;i++){ + uint64_t v; + /*Test each bit in turn (and 0).*/ + v=i?(uint64_t)1U<<(i-1):0; + for(j=0;j<NTRIALS;j++){ + int l; + l=test_ilog64(v); + ok1(STATIC_ILOG_64(v)==l); + ok1(ilog64(v)==l); + ok1(ilog64_nz(v) == l || v == 0); + /*Also try a few more pseudo-random values with at most the same number + of bits.*/ + v=(uint64_t)((2862933555777941757ULL*v+3037000493ULL) + &0xFFFFFFFFFFFFFFFFULL>>((65-i)>>1)>>((64-i)>>1)); + } + } + return exit_status(); +} diff --git a/lib/ccan/libccan.m4 b/lib/ccan/libccan.m4 new file mode 100644 index 0000000000..92676c3184 --- /dev/null +++ b/lib/ccan/libccan.m4 @@ -0,0 +1,315 @@ +dnl find the ccan sources. +ccandir="../lib/ccan" +for d in $ccanpaths; do + if test -f "$srcdir/$d/str/str.c"; then + ccandir="$d" + AC_SUBST(ccandir) + break + fi +done +if test -f "$ccandir/str/str.c"; then :; else + AC_MSG_ERROR([cannot find ccan source in $ccandir]) +fi +CCAN_OBJ="$ccandir/hash/hash.o $ccandir/htable/htable.o $ccandir/ilog/ilog.o $ccandir/likely/likely.o $ccandir/str/debug.o $ccandir/str/str.o $ccandir/tally/tally.o" + +AC_SUBST(CCAN_OBJ) + +# Preferred method for including ccan modules is #include <ccan/module/...>. +CCAN_CFLAGS="-I$ccandir/.." +AC_SUBST(CCAN_CFLAGS) + +# All the configuration checks. Regrettably, the __attribute__ checks will +# give false positives on old GCCs, since they just cause warnings. But that's +# fairly harmless. +AC_CACHE_CHECK([whether we can compile with __attribute__((cold))], + samba_cv_attribute_cold, + [ + AC_COMPILE_IFELSE( + [ + static void __attribute__((cold)) + cleanup(void) { } + ], + samba_cv_attribute_cold=yes) + ]) + +if test x"$samba_cv_attribute_cold" = xyes ; then + AC_DEFINE(HAVE_ATTRIBUTE_COLD, 1, + [whether we can compile with __attribute__((cold))]) +fi + +AC_CACHE_CHECK([whether we can compile with __attribute__((const))], + samba_cv_attribute_const, + [ + AC_COMPILE_IFELSE( + [ + static void __attribute__((const)) + cleanup(void) { } + ], + samba_cv_attribute_const=yes) + ]) + +if test x"$samba_cv_attribute_const" = xyes ; then + AC_DEFINE(HAVE_ATTRIBUTE_CONST, 1, + [whether we can compile with __attribute__((const))]) +fi + +AC_CACHE_CHECK([whether we can compile with __attribute__((noreturn))], + samba_cv_attribute_noreturn, + [ + AC_COMPILE_IFELSE( + [ + static void __attribute__((noreturn)) + cleanup(void) { exit(1); } + ], + samba_cv_attribute_noreturn=yes) + ]) + +if test x"$samba_cv_attribute_noreturn" = xyes ; then + AC_DEFINE(HAVE_ATTRIBUTE_NORETURN, 1, + [whether we can compile with __attribute__((noreturn))]) +fi + +AC_CACHE_CHECK([whether we can compile with __attribute__((printf))], + samba_cv_attribute_printf, + [ + AC_COMPILE_IFELSE( + [ + static void __attribute__((format(__printf__, 1, 2))) + cleanup(const char *fmt, ...) { } + ], + samba_cv_attribute_printf=yes) + ]) + +if test x"$samba_cv_attribute_printf" = xyes ; then + AC_DEFINE(HAVE_ATTRIBUTE_PRINTF, 1, + [whether we can compile with __attribute__((format(printf)))]) +fi + +AC_CACHE_CHECK([whether we can compile with __attribute__((unused))], + samba_cv_attribute_unused, + [ + AC_COMPILE_IFELSE( + [ + static void __attribute__((unused)) + cleanup(void) { } + ], + samba_cv_attribute_unused=yes) + ]) + +if test x"$samba_cv_attribute_unused" = xyes ; then + AC_DEFINE(HAVE_ATTRIBUTE_UNUSED, 1, + [whether we can compile with __attribute__((unused))]) +fi + +AC_CACHE_CHECK([whether we can compile with __attribute__((used))], + samba_cv_attribute_used, + [ + AC_COMPILE_IFELSE( + [ + static void __attribute__((used)) + cleanup(void) { } + ], + samba_cv_attribute_used=yes) + ]) + +if test x"$samba_cv_attribute_used" = xyes ; then + AC_DEFINE(HAVE_ATTRIBUTE_USED, 1, + [whether we can compile with __attribute__((used))]) +fi + +# FIXME: We could use endian.h or sys/endian.h here, and __BYTE_ORDER for +# cross-compiling. +AC_CACHE_CHECK([whether we are big endian],samba_cv_big_endian,[ +AC_TRY_RUN([int main(void) { +union { int i; char c[sizeof(int)]; } u; + u.i = 0x01020304; + return u.c[0] == 0x01 && u.c[1] == 0x02 && u.c[2] == 0x03 && u.c[3] == 0x04 ? 0 : 1; +}], +samba_cv_big_endian=yes, +samba_cv_big_endian=no)]) +if test x"$samba_cv_big_endian" = xyes ; then + AC_DEFINE(HAVE_BIG_ENDIAN, 1, + [whether we are big endian]) +fi + +AC_CACHE_CHECK([whether we have __builtin_clz], + samba_cv_builtin_clz, + [ + AC_COMPILE_IFELSE( + [int main(void) { + return __builtin_clz(1) == (sizeof(int)*8 - 1) ? 0 : 1; + }], + samba_cv_builtin_clz=yes) + ]) + +if test x"$samba_cv_builtin_clz" = xyes ; then + AC_DEFINE(HAVE_BUILTIN_CLZ, 1, + [whether we have __builtin_clz]) +fi + +AC_CACHE_CHECK([whether we have __builtin_clzl], + samba_cv_builtin_clzl, + [ + AC_COMPILE_IFELSE( + [int main(void) { + return __builtin_clzl(1) == (sizeof(int)*8 - 1) ? 0 : 1; + }], + samba_cv_builtin_clzl=yes) + ]) + +if test x"$samba_cv_builtin_clzl" = xyes ; then + AC_DEFINE(HAVE_BUILTIN_CLZL, 1, + [whether we have __builtin_clzl]) +fi +AC_CACHE_CHECK([whether we have __builtin_clzll], + samba_cv_builtin_clzll, + [ + AC_COMPILE_IFELSE( + [int main(void) { + return __builtin_clzll(1) == (sizeof(int)*8 - 1) ? 0 : 1; + }], + samba_cv_builtin_clzll=yes) + ]) + +if test x"$samba_cv_builtin_clzll" = xyes ; then + AC_DEFINE(HAVE_BUILTIN_CLZLL, 1, + [whether we have __builtin_clzll]) +fi + +AC_CACHE_CHECK([whether we have __builtin_constant_p], + samba_cv_builtin_constant_p, + [ + AC_COMPILE_IFELSE( + [int main(void) { + return __builtin_constant_p(1) ? 0 : 1; + }], + samba_cv_builtin_constant_p=yes) + ]) + +if test x"$samba_cv_builtin_constant_p" = xyes ; then + AC_DEFINE(HAVE_BUILTIN_CONSTANT_P, 1, + [whether we have __builtin_constant_p]) +fi + +AC_CACHE_CHECK([whether we have __builtin_expect], + samba_cv_builtin_expect, + [ + AC_COMPILE_IFELSE( + [int main(void) { + return __builtin_expect(main != 0) ? 0 : 1; + }], + samba_cv_builtin_expect=yes) + ]) + +if test x"$samba_cv_builtin_expect" = xyes ; then + AC_DEFINE(HAVE_BUILTIN_EXPECT, 1, + [whether we have __builtin_expect]) +fi + +AC_CACHE_CHECK([whether we have __builtin_popcountl], + samba_cv_builtin_popcountl, + [ + AC_COMPILE_IFELSE( + [int main(void) { + return __builtin_popcountl(255L) == 8 ? 0 : 1; + }], + samba_cv_builtin_popcountl=yes) + ]) + +if test x"$samba_cv_builtin_popcountl" = xyes ; then + AC_DEFINE(HAVE_BUILTIN_POPCOUNTL, 1, + [whether we have __builtin_popcountl]) +fi + +AC_CACHE_CHECK([whether we have __builtin_types_compatible_p], + samba_cv_builtin_types_compatible_p, + [ + AC_COMPILE_IFELSE( + [int main(void) { + return __builtin_types_compatible_p(char *, int) ? 1 : 0; + }], + samba_cv_builtin_types_compatible_p=yes) + ]) + +if test x"$samba_cv_builtin_types_compatible_p" = xyes ; then + AC_DEFINE(HAVE_BUILTIN_TYPES_COMPATIBLE_P, 1, + [whether we have __builtin_types_compatible_p]) +fi + +AC_CACHE_CHECK([whether we have __builtin_compound_literals], + samba_cv_builtin_compound_literals, + [ + AC_COMPILE_IFELSE( + [int main(void) { + int *foo = (int[]) { 1, 2, 3, 4 }; + return foo[0] == 1 ? 0 : 1; + }], + samba_cv_builtin_compound_literals=yes) + ]) + +if test x"$samba_cv_builtin_compound_literals" = xyes ; then + AC_DEFINE(HAVE_BUILTIN_COMPOUND_LITERALS, 1, + [whether we have __builtin_compound_literals]) +fi + +AC_CACHE_CHECK([whether we have __builtin_have_isblank], + samba_cv_builtin_have_isblank, + [ + AC_COMPILE_IFELSE( + [#include <ctype.h> + int main(void) { return isblank(' ') ? 0 : 1; } + ], + samba_cv_builtin_have_isblank=yes) + ]) + +if test x"$samba_cv_builtin_have_isblank" = xyes ; then + AC_DEFINE(HAVE_BUILTIN_HAVE_ISBLANK, 1, + [whether we have __builtin_have_isblank]) +fi + +# FIXME: We could use endian.h or sys/endian.h here, and __BYTE_ORDER for +# cross-compiling. +AC_CACHE_CHECK([whether we are little endian],samba_cv_little_endian,[ +AC_TRY_RUN([int main(void) { +union { int i; char c[sizeof(int)]; } u; + u.i = 0x01020304; + return u.c[0] == 0x04 && u.c[1] == 0x03 && u.c[2] == 0x02 && u.c[3] == 0x01 ? 0 : 1; +}], +samba_cv_little_endian=yes, +samba_cv_little_endian=no)]) +if test x"$samba_cv_little_endian" = xyes ; then + AC_DEFINE(HAVE_LITTLE_ENDIAN, 1, + [whether we are little endian]) +fi + +AC_CACHE_CHECK([whether we have __typeof__], + samba_cv_typeof, + [ + AC_COMPILE_IFELSE( + [int main(void) { + int x = 1; + __typeof__(x) i; + i = x; + return i == x ? 0 : 1; + }], + samba_cv_typeof=yes) + ]) + +if test x"$samba_cv_typeof" = xyes ; then + AC_DEFINE(HAVE_TYPEOF, 1, + [whether we have __typeof__]) +fi + +AC_CACHE_CHECK([whether we have __attribute__((warn_unused_result))], + samba_cv_warn_unused_result, + [ + AC_COMPILE_IFELSE( + [int __attribute__((warn_unused_result)) func(int x) + { return x; }], + samba_cv_warn_unused_result=yes) + ]) + +if test x"$samba_cv_warn_unused_result" = xyes ; then + AC_DEFINE(HAVE_WARN_UNUSED_RESULT, 1, + [whether we have __attribute__((warn_unused_result))]) +fi diff --git a/lib/ccan/likely/LICENSE b/lib/ccan/likely/LICENSE new file mode 100644 index 0000000000..5522aa5f33 --- /dev/null +++ b/lib/ccan/likely/LICENSE @@ -0,0 +1,508 @@ + + GNU LESSER GENERAL PUBLIC LICENSE + Version 2.1, February 1999 + + Copyright (C) 1991, 1999 Free Software Foundation, Inc. + 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + +[This is the first released version of the Lesser GPL. It also counts + as the successor of the GNU Library Public License, version 2, hence + the version number 2.1.] + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +Licenses are intended to guarantee your freedom to share and change +free software--to make sure the software is free for all its users. + + This license, the Lesser General Public License, applies to some +specially designated software packages--typically libraries--of the +Free Software Foundation and other authors who decide to use it. You +can use it too, but we suggest you first think carefully about whether +this license or the ordinary General Public License is the better +strategy to use in any particular case, based on the explanations +below. + + When we speak of free software, we are referring to freedom of use, +not price. Our General Public Licenses are designed to make sure that +you have the freedom to distribute copies of free software (and charge +for this service if you wish); that you receive source code or can get +it if you want it; that you can change the software and use pieces of +it in new free programs; and that you are informed that you can do +these things. + + To protect your rights, we need to make restrictions that forbid +distributors to deny you these rights or to ask you to surrender these +rights. These restrictions translate to certain responsibilities for +you if you distribute copies of the library or if you modify it. + + For example, if you distribute copies of the library, whether gratis +or for a fee, you must give the recipients all the rights that we gave +you. You must make sure that they, too, receive or can get the source +code. If you link other code with the library, you must provide +complete object files to the recipients, so that they can relink them +with the library after making changes to the library and recompiling +it. And you must show them these terms so they know their rights. + + We protect your rights with a two-step method: (1) we copyright the +library, and (2) we offer you this license, which gives you legal +permission to copy, distribute and/or modify the library. + + To protect each distributor, we want to make it very clear that +there is no warranty for the free library. Also, if the library is +modified by someone else and passed on, the recipients should know +that what they have is not the original version, so that the original +author's reputation will not be affected by problems that might be +introduced by others. + + Finally, software patents pose a constant threat to the existence of +any free program. We wish to make sure that a company cannot +effectively restrict the users of a free program by obtaining a +restrictive license from a patent holder. Therefore, we insist that +any patent license obtained for a version of the library must be +consistent with the full freedom of use specified in this license. + + Most GNU software, including some libraries, is covered by the +ordinary GNU General Public License. This license, the GNU Lesser +General Public License, applies to certain designated libraries, and +is quite different from the ordinary General Public License. We use +this license for certain libraries in order to permit linking those +libraries into non-free programs. + + When a program is linked with a library, whether statically or using +a shared library, the combination of the two is legally speaking a +combined work, a derivative of the original library. The ordinary +General Public License therefore permits such linking only if the +entire combination fits its criteria of freedom. The Lesser General +Public License permits more lax criteria for linking other code with +the library. + + We call this license the "Lesser" General Public License because it +does Less to protect the user's freedom than the ordinary General +Public License. It also provides other free software developers Less +of an advantage over competing non-free programs. These disadvantages +are the reason we use the ordinary General Public License for many +libraries. However, the Lesser license provides advantages in certain +special circumstances. + + For example, on rare occasions, there may be a special need to +encourage the widest possible use of a certain library, so that it +becomes a de-facto standard. To achieve this, non-free programs must +be allowed to use the library. A more frequent case is that a free +library does the same job as widely used non-free libraries. In this +case, there is little to gain by limiting the free library to free +software only, so we use the Lesser General Public License. + + In other cases, permission to use a particular library in non-free +programs enables a greater number of people to use a large body of +free software. For example, permission to use the GNU C Library in +non-free programs enables many more people to use the whole GNU +operating system, as well as its variant, the GNU/Linux operating +system. + + Although the Lesser General Public License is Less protective of the +users' freedom, it does ensure that the user of a program that is +linked with the Library has the freedom and the wherewithal to run +that program using a modified version of the Library. + + The precise terms and conditions for copying, distribution and +modification follow. Pay close attention to the difference between a +"work based on the library" and a "work that uses the library". The +former contains code derived from the library, whereas the latter must +be combined with the library in order to run. + + GNU LESSER GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License Agreement applies to any software library or other +program which contains a notice placed by the copyright holder or +other authorized party saying it may be distributed under the terms of +this Lesser General Public License (also called "this License"). +Each licensee is addressed as "you". + + A "library" means a collection of software functions and/or data +prepared so as to be conveniently linked with application programs +(which use some of those functions and data) to form executables. + + The "Library", below, refers to any such software library or work +which has been distributed under these terms. A "work based on the +Library" means either the Library or any derivative work under +copyright law: that is to say, a work containing the Library or a +portion of it, either verbatim or with modifications and/or translated +straightforwardly into another language. (Hereinafter, translation is +included without limitation in the term "modification".) + + "Source code" for a work means the preferred form of the work for +making modifications to it. For a library, complete source code means +all the source code for all modules it contains, plus any associated +interface definition files, plus the scripts used to control +compilation and installation of the library. + + Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running a program using the Library is not restricted, and output from +such a program is covered only if its contents constitute a work based +on the Library (independent of the use of the Library in a tool for +writing it). Whether that is true depends on what the Library does +and what the program that uses the Library does. + + 1. You may copy and distribute verbatim copies of the Library's +complete source code as you receive it, in any medium, provided that +you conspicuously and appropriately publish on each copy an +appropriate copyright notice and disclaimer of warranty; keep intact +all the notices that refer to this License and to the absence of any +warranty; and distribute a copy of this License along with the +Library. + + You may charge a fee for the physical act of transferring a copy, +and you may at your option offer warranty protection in exchange for a +fee. + + 2. You may modify your copy or copies of the Library or any portion +of it, thus forming a work based on the Library, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) The modified work must itself be a software library. + + b) You must cause the files modified to carry prominent notices + stating that you changed the files and the date of any change. + + c) You must cause the whole of the work to be licensed at no + charge to all third parties under the terms of this License. + + d) If a facility in the modified Library refers to a function or a + table of data to be supplied by an application program that uses + the facility, other than as an argument passed when the facility + is invoked, then you must make a good faith effort to ensure that, + in the event an application does not supply such function or + table, the facility still operates, and performs whatever part of + its purpose remains meaningful. + + (For example, a function in a library to compute square roots has + a purpose that is entirely well-defined independent of the + application. Therefore, Subsection 2d requires that any + application-supplied function or table used by this function must + be optional: if the application does not supply it, the square + root function must still compute square roots.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Library, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Library, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote +it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Library. + +In addition, mere aggregation of another work not based on the Library +with the Library (or with a work based on the Library) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may opt to apply the terms of the ordinary GNU General Public +License instead of this License to a given copy of the Library. To do +this, you must alter all the notices that refer to this License, so +that they refer to the ordinary GNU General Public License, version 2, +instead of to this License. (If a newer version than version 2 of the +ordinary GNU General Public License has appeared, then you can specify +that version instead if you wish.) Do not make any other change in +these notices. + + Once this change is made in a given copy, it is irreversible for +that copy, so the ordinary GNU General Public License applies to all +subsequent copies and derivative works made from that copy. + + This option is useful when you wish to copy part of the code of +the Library into a program that is not a library. + + 4. You may copy and distribute the Library (or a portion or +derivative of it, under Section 2) in object code or executable form +under the terms of Sections 1 and 2 above provided that you accompany +it with the complete corresponding machine-readable source code, which +must be distributed under the terms of Sections 1 and 2 above on a +medium customarily used for software interchange. + + If distribution of object code is made by offering access to copy +from a designated place, then offering equivalent access to copy the +source code from the same place satisfies the requirement to +distribute the source code, even though third parties are not +compelled to copy the source along with the object code. + + 5. A program that contains no derivative of any portion of the +Library, but is designed to work with the Library by being compiled or +linked with it, is called a "work that uses the Library". Such a +work, in isolation, is not a derivative work of the Library, and +therefore falls outside the scope of this License. + + However, linking a "work that uses the Library" with the Library +creates an executable that is a derivative of the Library (because it +contains portions of the Library), rather than a "work that uses the +library". The executable is therefore covered by this License. +Section 6 states terms for distribution of such executables. + + When a "work that uses the Library" uses material from a header file +that is part of the Library, the object code for the work may be a +derivative work of the Library even though the source code is not. +Whether this is true is especially significant if the work can be +linked without the Library, or if the work is itself a library. The +threshold for this to be true is not precisely defined by law. + + If such an object file uses only numerical parameters, data +structure layouts and accessors, and small macros and small inline +functions (ten lines or less in length), then the use of the object +file is unrestricted, regardless of whether it is legally a derivative +work. (Executables containing this object code plus portions of the +Library will still fall under Section 6.) + + Otherwise, if the work is a derivative of the Library, you may +distribute the object code for the work under the terms of Section 6. +Any executables containing that work also fall under Section 6, +whether or not they are linked directly with the Library itself. + + 6. As an exception to the Sections above, you may also combine or +link a "work that uses the Library" with the Library to produce a +work containing portions of the Library, and distribute that work +under terms of your choice, provided that the terms permit +modification of the work for the customer's own use and reverse +engineering for debugging such modifications. + + You must give prominent notice with each copy of the work that the +Library is used in it and that the Library and its use are covered by +this License. You must supply a copy of this License. If the work +during execution displays copyright notices, you must include the +copyright notice for the Library among them, as well as a reference +directing the user to the copy of this License. Also, you must do one +of these things: + + a) Accompany the work with the complete corresponding + machine-readable source code for the Library including whatever + changes were used in the work (which must be distributed under + Sections 1 and 2 above); and, if the work is an executable linked + with the Library, with the complete machine-readable "work that + uses the Library", as object code and/or source code, so that the + user can modify the Library and then relink to produce a modified + executable containing the modified Library. (It is understood + that the user who changes the contents of definitions files in the + Library will not necessarily be able to recompile the application + to use the modified definitions.) + + b) Use a suitable shared library mechanism for linking with the + Library. A suitable mechanism is one that (1) uses at run time a + copy of the library already present on the user's computer system, + rather than copying library functions into the executable, and (2) + will operate properly with a modified version of the library, if + the user installs one, as long as the modified version is + interface-compatible with the version that the work was made with. + + c) Accompany the work with a written offer, valid for at least + three years, to give the same user the materials specified in + Subsection 6a, above, for a charge no more than the cost of + performing this distribution. + + d) If distribution of the work is made by offering access to copy + from a designated place, offer equivalent access to copy the above + specified materials from the same place. + + e) Verify that the user has already received a copy of these + materials or that you have already sent this user a copy. + + For an executable, the required form of the "work that uses the +Library" must include any data and utility programs needed for +reproducing the executable from it. However, as a special exception, +the materials to be distributed need not include anything that is +normally distributed (in either source or binary form) with the major +components (compiler, kernel, and so on) of the operating system on +which the executable runs, unless that component itself accompanies +the executable. + + It may happen that this requirement contradicts the license +restrictions of other proprietary libraries that do not normally +accompany the operating system. Such a contradiction means you cannot +use both them and the Library together in an executable that you +distribute. + + 7. You may place library facilities that are a work based on the +Library side-by-side in a single library together with other library +facilities not covered by this License, and distribute such a combined +library, provided that the separate distribution of the work based on +the Library and of the other library facilities is otherwise +permitted, and provided that you do these two things: + + a) Accompany the combined library with a copy of the same work + based on the Library, uncombined with any other library + facilities. This must be distributed under the terms of the + Sections above. + + b) Give prominent notice with the combined library of the fact + that part of it is a work based on the Library, and explaining + where to find the accompanying uncombined form of the same work. + + 8. You may not copy, modify, sublicense, link with, or distribute +the Library except as expressly provided under this License. Any +attempt otherwise to copy, modify, sublicense, link with, or +distribute the Library is void, and will automatically terminate your +rights under this License. However, parties who have received copies, +or rights, from you under this License will not have their licenses +terminated so long as such parties remain in full compliance. + + 9. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Library or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Library (or any work based on the +Library), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Library or works based on it. + + 10. Each time you redistribute the Library (or any work based on the +Library), the recipient automatically receives a license from the +original licensor to copy, distribute, link with or modify the Library +subject to these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties with +this License. + + 11. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Library at all. For example, if a patent +license would not permit royalty-free redistribution of the Library by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Library. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply, and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 12. If the distribution and/or use of the Library is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Library under this License +may add an explicit geographical distribution limitation excluding those +countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 13. The Free Software Foundation may publish revised and/or new +versions of the Lesser General Public License from time to time. +Such new versions will be similar in spirit to the present version, +but may differ in detail to address new problems or concerns. + +Each version is given a distinguishing version number. If the Library +specifies a version number of this License which applies to it and +"any later version", you have the option of following the terms and +conditions either of that version or of any later version published by +the Free Software Foundation. If the Library does not specify a +license version number, you may choose any version ever published by +the Free Software Foundation. + + 14. If you wish to incorporate parts of the Library into other free +programs whose distribution conditions are incompatible with these, +write to the author to ask for permission. For software which is +copyrighted by the Free Software Foundation, write to the Free +Software Foundation; we sometimes make exceptions for this. Our +decision will be guided by the two goals of preserving the free status +of all derivatives of our free software and of promoting the sharing +and reuse of software generally. + + NO WARRANTY + + 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO +WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. +EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR +OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY +KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE +LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME +THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN +WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY +AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU +FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR +CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE +LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING +RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A +FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF +SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH +DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Libraries + + If you develop a new library, and you want it to be of the greatest +possible use to the public, we recommend making it free software that +everyone can redistribute and change. You can do so by permitting +redistribution under these terms (or, alternatively, under the terms +of the ordinary General Public License). + + To apply these terms, attach the following notices to the library. +It is safest to attach them to the start of each source file to most +effectively convey the exclusion of warranty; and each file should +have at least the "copyright" line and a pointer to where the full +notice is found. + + + <one line to give the library's name and a brief idea of what it does.> + Copyright (C) <year> <name of author> + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +Also add information on how to contact you by electronic and paper mail. + +You should also get your employer (if you work as a programmer) or +your school, if any, to sign a "copyright disclaimer" for the library, +if necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the + library `Frob' (a library for tweaking knobs) written by James + Random Hacker. + + <signature of Ty Coon>, 1 April 1990 + Ty Coon, President of Vice + +That's all there is to it! diff --git a/lib/ccan/likely/_info b/lib/ccan/likely/_info new file mode 100644 index 0000000000..471c1ffc78 --- /dev/null +++ b/lib/ccan/likely/_info @@ -0,0 +1,45 @@ +#include <string.h> +#include <stdio.h> +#include "config.h" + +/** + * likely - macros for annotating likely/unlikely branches in the code + * + * Inspired by Andi Kleen's macros for the Linux Kernel, these macros + * help you annotate rare paths in your code for the convenience of the + * compiler and the reader. + * + * License: LGPL (2 or any later version) + * Author: Rusty Russell <rusty@rustcorp.com.au> + * + * Example: + * #include <ccan/likely/likely.h> + * #include <stdio.h> + * + * int main(int argc, char *argv[]) + * { + * // This example is silly: the compiler knows exit() is unlikely. + * if (unlikely(argc == 1)) { + * fprintf(stderr, "Usage: %s <args>...\n", argv[0]); + * return 1; + * } + * for (argc++; argv[argc]; argc++) + * printf("%s\n", argv[argc]); + * return 0; + * } + */ +int main(int argc, char *argv[]) +{ + /* Expect exactly one argument */ + if (argc != 2) + return 1; + + if (strcmp(argv[1], "depends") == 0) { + printf("ccan/str\n"); + printf("ccan/htable\n"); + printf("ccan/hash\n"); + return 0; + } + + return 1; +} diff --git a/lib/ccan/likely/likely.c b/lib/ccan/likely/likely.c new file mode 100644 index 0000000000..8893d0b6d2 --- /dev/null +++ b/lib/ccan/likely/likely.c @@ -0,0 +1,141 @@ +#ifdef CCAN_LIKELY_DEBUG +#include <ccan/likely/likely.h> +#include <ccan/hash/hash.h> +#include <ccan/htable/htable.h> +#include <stdlib.h> +#include <stdio.h> +static struct htable *htable; + +struct trace { + const char *condstr; + const char *file; + unsigned int line; + bool expect; + unsigned long count, right; +}; + +/* We hash the pointers, which will be identical for same call. */ +static unsigned long hash_trace(const struct trace *trace) +{ + return hash_pointer(trace->condstr, + hash_pointer(trace->file, + trace->line + trace->expect)); +} + +static bool hash_cmp(const void *htelem, void *cmpdata) +{ + const struct trace *t1 = htelem, *t2 = cmpdata; + return t1->condstr == t2->condstr + && t1->file == t2->file + && t1->line == t2->line + && t1->expect == t2->expect; +} + +static size_t rehash(const void *elem, void *priv) +{ + return hash_trace(elem); +} + +static void init_trace(struct trace *trace, + const char *condstr, const char *file, unsigned int line, + bool expect) +{ + trace->condstr = condstr; + trace->file = file; + trace->line = line; + trace->expect = expect; + trace->count = trace->right = 0; +} + +static struct trace *add_trace(const char *condstr, + const char *file, unsigned int line, bool expect) +{ + struct trace *trace = malloc(sizeof(*trace)); + init_trace(trace, condstr, file, line, expect); + htable_add(htable, hash_trace(trace), trace); + return trace; +} + +long _likely_trace(bool cond, bool expect, + const char *condstr, + const char *file, unsigned int line) +{ + struct trace *p, trace; + + if (!htable) + htable = htable_new(rehash, NULL); + + init_trace(&trace, condstr, file, line, expect); + p = htable_get(htable, hash_trace(&trace), hash_cmp, &trace); + if (!p) + p = add_trace(condstr, file, line, expect); + + p->count++; + if (cond == expect) + p->right++; + + return cond; +} + +struct get_stats_info { + struct trace *worst; + unsigned int min_hits; + double worst_ratio; +}; + +static double right_ratio(const struct trace *t) +{ + return (double)t->right / t->count; +} + +static void get_stats(struct trace *trace, struct get_stats_info *info) +{ + if (trace->count < info->min_hits) + return; + + if (right_ratio(trace) < info->worst_ratio) { + info->worst = trace; + info->worst_ratio = right_ratio(trace); + } +} + +const char *likely_stats(unsigned int min_hits, unsigned int percent) +{ + struct get_stats_info info; + struct htable_iter i; + char *ret; + struct trace *trace; + + if (!htable) + return NULL; + + info.min_hits = min_hits; + info.worst = NULL; + info.worst_ratio = 2; + + /* This is O(n), but it's not likely called that often. */ + for (trace = htable_first(htable, &i); + trace; + trace = htable_next(htable,&i)) { + get_stats(trace, &info); + } + + if (info.worst_ratio * 100 > percent) + return NULL; + + ret = malloc(strlen(info.worst->condstr) + + strlen(info.worst->file) + + sizeof(long int) * 8 + + sizeof("%s:%u:%slikely(%s) correct %u%% (%lu/%lu)")); + sprintf(ret, "%s:%u:%slikely(%s) correct %u%% (%lu/%lu)", + info.worst->file, info.worst->line, + info.worst->expect ? "" : "un", info.worst->condstr, + (unsigned)(info.worst_ratio * 100), + info.worst->right, info.worst->count); + + htable_del(htable, hash_trace(info.worst), info.worst); + free(info.worst); + + return ret; +} +#endif /*CCAN_LIKELY_DEBUG*/ diff --git a/lib/ccan/likely/likely.h b/lib/ccan/likely/likely.h new file mode 100644 index 0000000000..80d695c842 --- /dev/null +++ b/lib/ccan/likely/likely.h @@ -0,0 +1,105 @@ +#ifndef CCAN_LIKELY_H +#define CCAN_LIKELY_H +#include "config.h" +#include <ccan/str/str.h> +#include <stdbool.h> + +#ifndef CCAN_LIKELY_DEBUG +#if HAVE_BUILTIN_EXPECT +/** + * likely - indicate that a condition is likely to be true. + * @cond: the condition + * + * This uses a compiler extension where available to indicate a likely + * code path and optimize appropriately; it's also useful for readers + * to quickly identify exceptional paths through functions. The + * threshold for "likely" is usually considered to be between 90 and + * 99%; marginal cases should not be marked either way. + * + * See Also: + * unlikely(), likely_stats() + * + * Example: + * // Returns false if we overflow. + * static inline bool inc_int(unsigned int *val) + * { + * (*val)++; + * if (likely(*val)) + * return true; + * return false; + * } + */ +#define likely(cond) __builtin_expect(!!(cond), 1) + +/** + * unlikely - indicate that a condition is unlikely to be true. + * @cond: the condition + * + * This uses a compiler extension where available to indicate an unlikely + * code path and optimize appropriately; see likely() above. + * + * See Also: + * likely(), likely_stats(), COLD (compiler.h) + * + * Example: + * // Prints a warning if we overflow. + * static inline void inc_int(unsigned int *val) + * { + * (*val)++; + * if (unlikely(*val == 0)) + * fprintf(stderr, "Overflow!"); + * } + */ +#define unlikely(cond) __builtin_expect(!!(cond), 0) +#else +#define likely(cond) (!!(cond)) +#define unlikely(cond) (!!(cond)) +#endif +#else /* CCAN_LIKELY_DEBUG versions */ +#define likely(cond) \ + (_likely_trace(!!(cond), 1, stringify(cond), __FILE__, __LINE__)) +#define unlikely(cond) \ + (_likely_trace(!!(cond), 0, stringify(cond), __FILE__, __LINE__)) + +long _likely_trace(bool cond, bool expect, + const char *condstr, + const char *file, unsigned int line); +#endif + +#ifdef CCAN_LIKELY_DEBUG +/** + * likely_stats - return description of abused likely()/unlikely() + * @min_hits: minimum number of hits + * @percent: maximum percentage correct + * + * When CCAN_LIKELY_DEBUG is defined, likely() and unlikely() trace their + * results: this causes a significant slowdown, but allows analysis of + * whether the branches are labelled correctly. + * + * This function returns a malloc'ed description of the least-correct + * usage of likely() or unlikely(). It ignores places which have been + * called less than @min_hits times, and those which were predicted + * correctly more than @percent of the time. It returns NULL when + * nothing meets those criteria. + * + * Note that this call is destructive; the returned offender is + * removed from the trace so that the next call to likely_stats() will + * return the next-worst likely()/unlikely() usage. + * + * Example: + * // Print every place hit more than twice which was wrong > 5%. + * static void report_stats(void) + * { + * #ifdef CCAN_LIKELY_DEBUG + * const char *bad; + * + * while ((bad = likely_stats(2, 95)) != NULL) { + * printf("Suspicious likely: %s", bad); + * free(bad); + * } + * #endif + * } + */ +const char *likely_stats(unsigned int min_hits, unsigned int percent); +#endif /* CCAN_LIKELY_DEBUG */ +#endif /* CCAN_LIKELY_H */ diff --git a/lib/ccan/likely/test/run-debug.c b/lib/ccan/likely/test/run-debug.c new file mode 100644 index 0000000000..df78619271 --- /dev/null +++ b/lib/ccan/likely/test/run-debug.c @@ -0,0 +1,87 @@ +#define CCAN_LIKELY_DEBUG 1 +#include <ccan/likely/likely.c> +#include <ccan/likely/likely.h> +#include <ccan/tap/tap.h> +#include <stdlib.h> + +static bool one_seems_likely(unsigned int val) +{ + if (likely(val == 1)) + return true; + return false; +} + +static bool one_seems_unlikely(unsigned int val) +{ + if (unlikely(val == 1)) + return true; + return false; +} + +static bool likely_one_unlikely_two(unsigned int val1, unsigned int val2) +{ + /* Same line, check we don't get confused! */ + if (likely(val1 == 1) && unlikely(val2 == 2)) + return true; + return false; +} + +int main(int argc, char *argv[]) +{ + const char *bad; + + plan_tests(13); + + /* Correct guesses. */ + one_seems_likely(1); + ok1(likely_stats(0, 90) == NULL); + one_seems_unlikely(2); + ok1(likely_stats(0, 90) == NULL); + + /* Incorrect guesses. */ + one_seems_likely(0); + one_seems_likely(2); + /* Hasn't been hit 4 times, so this fails */ + ok1(!likely_stats(4, 90)); + bad = likely_stats(3, 90); + ok(strends(bad, "run-debug.c:9:likely(val == 1) correct 33% (1/3)"), + "likely_stats returned %s", bad); + + /* Nothing else above 90% */ + ok1(!likely_stats(0, 90)); + + /* This should get everything. */ + bad = likely_stats(0, 100); + ok(strends(bad, "run-debug.c:16:unlikely(val == 1) correct 100% (1/1)"), + "likely_stats returned %s", bad); + + /* Nothing left (table is actually cleared) */ + ok1(!likely_stats(0, 100)); + + /* Make sure unlikely works */ + one_seems_unlikely(0); + one_seems_unlikely(2); + one_seems_unlikely(1); + + bad = likely_stats(0, 90); + ok(strends(bad, "run-debug.c:16:unlikely(val == 1) correct 66% (2/3)"), + "likely_stats returned %s", bad); + ok1(!likely_stats(0, 100)); + + likely_one_unlikely_two(1, 1); + likely_one_unlikely_two(1, 1); + likely_one_unlikely_two(1, 1); + ok1(!likely_stats(0, 90)); + likely_one_unlikely_two(1, 2); + + bad = likely_stats(0, 90); + ok(strends(bad, "run-debug.c:24:unlikely(val2 == 2) correct 75% (3/4)"), + "likely_stats returned %s", bad); + bad = likely_stats(0, 100); + ok(strends(bad, "run-debug.c:24:likely(val1 == 1) correct 100% (4/4)"), + "likely_stats returned %s", bad); + + ok1(!likely_stats(0, 100)); + + exit(exit_status()); +} diff --git a/lib/ccan/likely/test/run.c b/lib/ccan/likely/test/run.c new file mode 100644 index 0000000000..fa1dc9f6ea --- /dev/null +++ b/lib/ccan/likely/test/run.c @@ -0,0 +1,30 @@ +#include <ccan/likely/likely.c> +#include <ccan/likely/likely.h> +#include <ccan/tap/tap.h> +#include <stdlib.h> + +static bool one_seems_likely(unsigned int val) +{ + if (likely(val == 1)) + return true; + return false; +} + +static bool one_seems_unlikely(unsigned int val) +{ + if (unlikely(val == 1)) + return true; + return false; +} + +int main(int argc, char *argv[]) +{ + plan_tests(4); + + /* Without debug, we can only check that it doesn't effect functions. */ + ok1(one_seems_likely(1)); + ok1(!one_seems_likely(2)); + ok1(one_seems_unlikely(1)); + ok1(!one_seems_unlikely(2)); + exit(exit_status()); +} diff --git a/lib/ccan/str/LICENSE b/lib/ccan/str/LICENSE new file mode 100644 index 0000000000..5522aa5f33 --- /dev/null +++ b/lib/ccan/str/LICENSE @@ -0,0 +1,508 @@ + + GNU LESSER GENERAL PUBLIC LICENSE + Version 2.1, February 1999 + + Copyright (C) 1991, 1999 Free Software Foundation, Inc. + 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + +[This is the first released version of the Lesser GPL. It also counts + as the successor of the GNU Library Public License, version 2, hence + the version number 2.1.] + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +Licenses are intended to guarantee your freedom to share and change +free software--to make sure the software is free for all its users. + + This license, the Lesser General Public License, applies to some +specially designated software packages--typically libraries--of the +Free Software Foundation and other authors who decide to use it. You +can use it too, but we suggest you first think carefully about whether +this license or the ordinary General Public License is the better +strategy to use in any particular case, based on the explanations +below. + + When we speak of free software, we are referring to freedom of use, +not price. Our General Public Licenses are designed to make sure that +you have the freedom to distribute copies of free software (and charge +for this service if you wish); that you receive source code or can get +it if you want it; that you can change the software and use pieces of +it in new free programs; and that you are informed that you can do +these things. + + To protect your rights, we need to make restrictions that forbid +distributors to deny you these rights or to ask you to surrender these +rights. These restrictions translate to certain responsibilities for +you if you distribute copies of the library or if you modify it. + + For example, if you distribute copies of the library, whether gratis +or for a fee, you must give the recipients all the rights that we gave +you. You must make sure that they, too, receive or can get the source +code. If you link other code with the library, you must provide +complete object files to the recipients, so that they can relink them +with the library after making changes to the library and recompiling +it. And you must show them these terms so they know their rights. + + We protect your rights with a two-step method: (1) we copyright the +library, and (2) we offer you this license, which gives you legal +permission to copy, distribute and/or modify the library. + + To protect each distributor, we want to make it very clear that +there is no warranty for the free library. Also, if the library is +modified by someone else and passed on, the recipients should know +that what they have is not the original version, so that the original +author's reputation will not be affected by problems that might be +introduced by others. + + Finally, software patents pose a constant threat to the existence of +any free program. We wish to make sure that a company cannot +effectively restrict the users of a free program by obtaining a +restrictive license from a patent holder. Therefore, we insist that +any patent license obtained for a version of the library must be +consistent with the full freedom of use specified in this license. + + Most GNU software, including some libraries, is covered by the +ordinary GNU General Public License. This license, the GNU Lesser +General Public License, applies to certain designated libraries, and +is quite different from the ordinary General Public License. We use +this license for certain libraries in order to permit linking those +libraries into non-free programs. + + When a program is linked with a library, whether statically or using +a shared library, the combination of the two is legally speaking a +combined work, a derivative of the original library. The ordinary +General Public License therefore permits such linking only if the +entire combination fits its criteria of freedom. The Lesser General +Public License permits more lax criteria for linking other code with +the library. + + We call this license the "Lesser" General Public License because it +does Less to protect the user's freedom than the ordinary General +Public License. It also provides other free software developers Less +of an advantage over competing non-free programs. These disadvantages +are the reason we use the ordinary General Public License for many +libraries. However, the Lesser license provides advantages in certain +special circumstances. + + For example, on rare occasions, there may be a special need to +encourage the widest possible use of a certain library, so that it +becomes a de-facto standard. To achieve this, non-free programs must +be allowed to use the library. A more frequent case is that a free +library does the same job as widely used non-free libraries. In this +case, there is little to gain by limiting the free library to free +software only, so we use the Lesser General Public License. + + In other cases, permission to use a particular library in non-free +programs enables a greater number of people to use a large body of +free software. For example, permission to use the GNU C Library in +non-free programs enables many more people to use the whole GNU +operating system, as well as its variant, the GNU/Linux operating +system. + + Although the Lesser General Public License is Less protective of the +users' freedom, it does ensure that the user of a program that is +linked with the Library has the freedom and the wherewithal to run +that program using a modified version of the Library. + + The precise terms and conditions for copying, distribution and +modification follow. Pay close attention to the difference between a +"work based on the library" and a "work that uses the library". The +former contains code derived from the library, whereas the latter must +be combined with the library in order to run. + + GNU LESSER GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License Agreement applies to any software library or other +program which contains a notice placed by the copyright holder or +other authorized party saying it may be distributed under the terms of +this Lesser General Public License (also called "this License"). +Each licensee is addressed as "you". + + A "library" means a collection of software functions and/or data +prepared so as to be conveniently linked with application programs +(which use some of those functions and data) to form executables. + + The "Library", below, refers to any such software library or work +which has been distributed under these terms. A "work based on the +Library" means either the Library or any derivative work under +copyright law: that is to say, a work containing the Library or a +portion of it, either verbatim or with modifications and/or translated +straightforwardly into another language. (Hereinafter, translation is +included without limitation in the term "modification".) + + "Source code" for a work means the preferred form of the work for +making modifications to it. For a library, complete source code means +all the source code for all modules it contains, plus any associated +interface definition files, plus the scripts used to control +compilation and installation of the library. + + Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running a program using the Library is not restricted, and output from +such a program is covered only if its contents constitute a work based +on the Library (independent of the use of the Library in a tool for +writing it). Whether that is true depends on what the Library does +and what the program that uses the Library does. + + 1. You may copy and distribute verbatim copies of the Library's +complete source code as you receive it, in any medium, provided that +you conspicuously and appropriately publish on each copy an +appropriate copyright notice and disclaimer of warranty; keep intact +all the notices that refer to this License and to the absence of any +warranty; and distribute a copy of this License along with the +Library. + + You may charge a fee for the physical act of transferring a copy, +and you may at your option offer warranty protection in exchange for a +fee. + + 2. You may modify your copy or copies of the Library or any portion +of it, thus forming a work based on the Library, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) The modified work must itself be a software library. + + b) You must cause the files modified to carry prominent notices + stating that you changed the files and the date of any change. + + c) You must cause the whole of the work to be licensed at no + charge to all third parties under the terms of this License. + + d) If a facility in the modified Library refers to a function or a + table of data to be supplied by an application program that uses + the facility, other than as an argument passed when the facility + is invoked, then you must make a good faith effort to ensure that, + in the event an application does not supply such function or + table, the facility still operates, and performs whatever part of + its purpose remains meaningful. + + (For example, a function in a library to compute square roots has + a purpose that is entirely well-defined independent of the + application. Therefore, Subsection 2d requires that any + application-supplied function or table used by this function must + be optional: if the application does not supply it, the square + root function must still compute square roots.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Library, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Library, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote +it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Library. + +In addition, mere aggregation of another work not based on the Library +with the Library (or with a work based on the Library) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may opt to apply the terms of the ordinary GNU General Public +License instead of this License to a given copy of the Library. To do +this, you must alter all the notices that refer to this License, so +that they refer to the ordinary GNU General Public License, version 2, +instead of to this License. (If a newer version than version 2 of the +ordinary GNU General Public License has appeared, then you can specify +that version instead if you wish.) Do not make any other change in +these notices. + + Once this change is made in a given copy, it is irreversible for +that copy, so the ordinary GNU General Public License applies to all +subsequent copies and derivative works made from that copy. + + This option is useful when you wish to copy part of the code of +the Library into a program that is not a library. + + 4. You may copy and distribute the Library (or a portion or +derivative of it, under Section 2) in object code or executable form +under the terms of Sections 1 and 2 above provided that you accompany +it with the complete corresponding machine-readable source code, which +must be distributed under the terms of Sections 1 and 2 above on a +medium customarily used for software interchange. + + If distribution of object code is made by offering access to copy +from a designated place, then offering equivalent access to copy the +source code from the same place satisfies the requirement to +distribute the source code, even though third parties are not +compelled to copy the source along with the object code. + + 5. A program that contains no derivative of any portion of the +Library, but is designed to work with the Library by being compiled or +linked with it, is called a "work that uses the Library". Such a +work, in isolation, is not a derivative work of the Library, and +therefore falls outside the scope of this License. + + However, linking a "work that uses the Library" with the Library +creates an executable that is a derivative of the Library (because it +contains portions of the Library), rather than a "work that uses the +library". The executable is therefore covered by this License. +Section 6 states terms for distribution of such executables. + + When a "work that uses the Library" uses material from a header file +that is part of the Library, the object code for the work may be a +derivative work of the Library even though the source code is not. +Whether this is true is especially significant if the work can be +linked without the Library, or if the work is itself a library. The +threshold for this to be true is not precisely defined by law. + + If such an object file uses only numerical parameters, data +structure layouts and accessors, and small macros and small inline +functions (ten lines or less in length), then the use of the object +file is unrestricted, regardless of whether it is legally a derivative +work. (Executables containing this object code plus portions of the +Library will still fall under Section 6.) + + Otherwise, if the work is a derivative of the Library, you may +distribute the object code for the work under the terms of Section 6. +Any executables containing that work also fall under Section 6, +whether or not they are linked directly with the Library itself. + + 6. As an exception to the Sections above, you may also combine or +link a "work that uses the Library" with the Library to produce a +work containing portions of the Library, and distribute that work +under terms of your choice, provided that the terms permit +modification of the work for the customer's own use and reverse +engineering for debugging such modifications. + + You must give prominent notice with each copy of the work that the +Library is used in it and that the Library and its use are covered by +this License. You must supply a copy of this License. If the work +during execution displays copyright notices, you must include the +copyright notice for the Library among them, as well as a reference +directing the user to the copy of this License. Also, you must do one +of these things: + + a) Accompany the work with the complete corresponding + machine-readable source code for the Library including whatever + changes were used in the work (which must be distributed under + Sections 1 and 2 above); and, if the work is an executable linked + with the Library, with the complete machine-readable "work that + uses the Library", as object code and/or source code, so that the + user can modify the Library and then relink to produce a modified + executable containing the modified Library. (It is understood + that the user who changes the contents of definitions files in the + Library will not necessarily be able to recompile the application + to use the modified definitions.) + + b) Use a suitable shared library mechanism for linking with the + Library. A suitable mechanism is one that (1) uses at run time a + copy of the library already present on the user's computer system, + rather than copying library functions into the executable, and (2) + will operate properly with a modified version of the library, if + the user installs one, as long as the modified version is + interface-compatible with the version that the work was made with. + + c) Accompany the work with a written offer, valid for at least + three years, to give the same user the materials specified in + Subsection 6a, above, for a charge no more than the cost of + performing this distribution. + + d) If distribution of the work is made by offering access to copy + from a designated place, offer equivalent access to copy the above + specified materials from the same place. + + e) Verify that the user has already received a copy of these + materials or that you have already sent this user a copy. + + For an executable, the required form of the "work that uses the +Library" must include any data and utility programs needed for +reproducing the executable from it. However, as a special exception, +the materials to be distributed need not include anything that is +normally distributed (in either source or binary form) with the major +components (compiler, kernel, and so on) of the operating system on +which the executable runs, unless that component itself accompanies +the executable. + + It may happen that this requirement contradicts the license +restrictions of other proprietary libraries that do not normally +accompany the operating system. Such a contradiction means you cannot +use both them and the Library together in an executable that you +distribute. + + 7. You may place library facilities that are a work based on the +Library side-by-side in a single library together with other library +facilities not covered by this License, and distribute such a combined +library, provided that the separate distribution of the work based on +the Library and of the other library facilities is otherwise +permitted, and provided that you do these two things: + + a) Accompany the combined library with a copy of the same work + based on the Library, uncombined with any other library + facilities. This must be distributed under the terms of the + Sections above. + + b) Give prominent notice with the combined library of the fact + that part of it is a work based on the Library, and explaining + where to find the accompanying uncombined form of the same work. + + 8. You may not copy, modify, sublicense, link with, or distribute +the Library except as expressly provided under this License. Any +attempt otherwise to copy, modify, sublicense, link with, or +distribute the Library is void, and will automatically terminate your +rights under this License. However, parties who have received copies, +or rights, from you under this License will not have their licenses +terminated so long as such parties remain in full compliance. + + 9. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Library or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Library (or any work based on the +Library), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Library or works based on it. + + 10. Each time you redistribute the Library (or any work based on the +Library), the recipient automatically receives a license from the +original licensor to copy, distribute, link with or modify the Library +subject to these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties with +this License. + + 11. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Library at all. For example, if a patent +license would not permit royalty-free redistribution of the Library by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Library. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply, and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 12. If the distribution and/or use of the Library is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Library under this License +may add an explicit geographical distribution limitation excluding those +countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 13. The Free Software Foundation may publish revised and/or new +versions of the Lesser General Public License from time to time. +Such new versions will be similar in spirit to the present version, +but may differ in detail to address new problems or concerns. + +Each version is given a distinguishing version number. If the Library +specifies a version number of this License which applies to it and +"any later version", you have the option of following the terms and +conditions either of that version or of any later version published by +the Free Software Foundation. If the Library does not specify a +license version number, you may choose any version ever published by +the Free Software Foundation. + + 14. If you wish to incorporate parts of the Library into other free +programs whose distribution conditions are incompatible with these, +write to the author to ask for permission. For software which is +copyrighted by the Free Software Foundation, write to the Free +Software Foundation; we sometimes make exceptions for this. Our +decision will be guided by the two goals of preserving the free status +of all derivatives of our free software and of promoting the sharing +and reuse of software generally. + + NO WARRANTY + + 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO +WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. +EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR +OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY +KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE +LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME +THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN +WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY +AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU +FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR +CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE +LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING +RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A +FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF +SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH +DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Libraries + + If you develop a new library, and you want it to be of the greatest +possible use to the public, we recommend making it free software that +everyone can redistribute and change. You can do so by permitting +redistribution under these terms (or, alternatively, under the terms +of the ordinary General Public License). + + To apply these terms, attach the following notices to the library. +It is safest to attach them to the start of each source file to most +effectively convey the exclusion of warranty; and each file should +have at least the "copyright" line and a pointer to where the full +notice is found. + + + <one line to give the library's name and a brief idea of what it does.> + Copyright (C) <year> <name of author> + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +Also add information on how to contact you by electronic and paper mail. + +You should also get your employer (if you work as a programmer) or +your school, if any, to sign a "copyright disclaimer" for the library, +if necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the + library `Frob' (a library for tweaking knobs) written by James + Random Hacker. + + <signature of Ty Coon>, 1 April 1990 + Ty Coon, President of Vice + +That's all there is to it! diff --git a/lib/ccan/str/_info b/lib/ccan/str/_info new file mode 100644 index 0000000000..ea314dbf79 --- /dev/null +++ b/lib/ccan/str/_info @@ -0,0 +1,52 @@ +#include <stdio.h> +#include <string.h> +#include "config.h" + +/** + * str - string helper routines + * + * This is a grab bag of functions for string operations, designed to enhance + * the standard string.h. + * + * Note that if you define CCAN_STR_DEBUG, you will get extra compile + * checks on common misuses of the following functions (they will now + * be out-of-line, so there is a runtime penalty!). + * + * strstr, strchr, strrchr: + * Return const char * if first argument is const (gcc only). + * + * isalnum, isalpha, isascii, isblank, iscntrl, isdigit, isgraph, + * islower, isprint, ispunct, isspace, isupper, isxdigit: + * Static and runtime check that input is EOF or an *unsigned* + * char, as per C standard (really!). + * + * Example: + * #include <stdio.h> + * #include <ccan/str/str.h> + * + * int main(int argc, char *argv[]) + * { + * if (argv[1] && streq(argv[1], "--verbose")) + * printf("verbose set\n"); + * if (argv[1] && strstarts(argv[1], "--")) + * printf("Some option set\n"); + * if (argv[1] && strends(argv[1], "cow-powers")) + * printf("Magic option set\n"); + * return 0; + * } + * + * License: LGPL (2 or any later version) + * Author: Rusty Russell <rusty@rustcorp.com.au> + */ +int main(int argc, char *argv[]) +{ + if (argc != 2) + return 1; + + if (strcmp(argv[1], "depends") == 0) { + printf("ccan/build_assert\n"); + return 0; + } + + return 1; +} diff --git a/lib/ccan/str/debug.c b/lib/ccan/str/debug.c new file mode 100644 index 0000000000..9ef756766a --- /dev/null +++ b/lib/ccan/str/debug.c @@ -0,0 +1,104 @@ +#include "config.h" +#include <ccan/str/str_debug.h> +#include <assert.h> +#include <ctype.h> +#include <string.h> + +#ifdef CCAN_STR_DEBUG +/* Because we mug the real ones with macros, we need our own wrappers. */ +int str_isalnum(int i) +{ + assert(i >= -1 && i < 256); + return isalnum(i); +} + +int str_isalpha(int i) +{ + assert(i >= -1 && i < 256); + return isalpha(i); +} + +int str_isascii(int i) +{ + assert(i >= -1 && i < 256); + return isascii(i); +} + +#if HAVE_ISBLANK +int str_isblank(int i) +{ + assert(i >= -1 && i < 256); + return isblank(i); +} +#endif + +int str_iscntrl(int i) +{ + assert(i >= -1 && i < 256); + return iscntrl(i); +} + +int str_isdigit(int i) +{ + assert(i >= -1 && i < 256); + return isdigit(i); +} + +int str_isgraph(int i) +{ + assert(i >= -1 && i < 256); + return isgraph(i); +} + +int str_islower(int i) +{ + assert(i >= -1 && i < 256); + return islower(i); +} + +int str_isprint(int i) +{ + assert(i >= -1 && i < 256); + return isprint(i); +} + +int str_ispunct(int i) +{ + assert(i >= -1 && i < 256); + return ispunct(i); +} + +int str_isspace(int i) +{ + assert(i >= -1 && i < 256); + return isspace(i); +} + +int str_isupper(int i) +{ + assert(i >= -1 && i < 256); + return isupper(i); +} + +int str_isxdigit(int i) +{ + assert(i >= -1 && i < 256); + return isxdigit(i); +} + + +char *str_strstr(const char *haystack, const char *needle) +{ + return strstr(haystack, needle); +} + +char *str_strchr(const char *haystack, int c) +{ + return strchr(haystack, c); +} + +char *str_strrchr(const char *haystack, int c) +{ + return strrchr(haystack, c); +} +#endif diff --git a/lib/ccan/str/str.c b/lib/ccan/str/str.c new file mode 100644 index 0000000000..fa9809fbd9 --- /dev/null +++ b/lib/ccan/str/str.c @@ -0,0 +1,12 @@ +#include <ccan/str/str.h> + +size_t strcount(const char *haystack, const char *needle) +{ + size_t i = 0, nlen = strlen(needle); + + while ((haystack = strstr(haystack, needle)) != NULL) { + i++; + haystack += nlen; + } + return i; +} diff --git a/lib/ccan/str/str.h b/lib/ccan/str/str.h new file mode 100644 index 0000000000..ae51cdcc99 --- /dev/null +++ b/lib/ccan/str/str.h @@ -0,0 +1,200 @@ +#ifndef CCAN_STR_H +#define CCAN_STR_H +#include "config.h" +#include <string.h> +#include <stdbool.h> +#include <ctype.h> + +/** + * streq - Are two strings equal? + * @a: first string + * @b: first string + * + * This macro is arguably more readable than "!strcmp(a, b)". + * + * Example: + * if (streq(somestring, "")) + * printf("String is empty!\n"); + */ +#define streq(a,b) (strcmp((a),(b)) == 0) + +/** + * strstarts - Does this string start with this prefix? + * @str: string to test + * @prefix: prefix to look for at start of str + * + * Example: + * if (strstarts(somestring, "foo")) + * printf("String %s begins with 'foo'!\n", somestring); + */ +#define strstarts(str,prefix) (strncmp((str),(prefix),strlen(prefix)) == 0) + +/** + * strends - Does this string end with this postfix? + * @str: string to test + * @postfix: postfix to look for at end of str + * + * Example: + * if (strends(somestring, "foo")) + * printf("String %s end with 'foo'!\n", somestring); + */ +static inline bool strends(const char *str, const char *postfix) +{ + if (strlen(str) < strlen(postfix)) + return false; + + return streq(str + strlen(str) - strlen(postfix), postfix); +} + +/** + * stringify - Turn expression into a string literal + * @expr: any C expression + * + * Example: + * #define PRINT_COND_IF_FALSE(cond) \ + * ((cond) || printf("%s is false!", stringify(cond))) + */ +#define stringify(expr) stringify_1(expr) +/* Double-indirection required to stringify expansions */ +#define stringify_1(expr) #expr + +/** + * strcount - Count number of (non-overlapping) occurrences of a substring. + * @haystack: a C string + * @needle: a substring + * + * Example: + * int i; + * i = strcount("aaa aaa", "a"); // i = 6; + * i = strcount("aaa aaa", "ab"); // i = 0; + * i = strcount("aaa aaa", "aa"); // i = 2; + */ +size_t strcount(const char *haystack, const char *needle); + +/** + * cisalnum - isalnum() which takes a char (and doesn't accept EOF) + * @c: a character + * + * Surprisingly, the standard ctype.h isalnum() takes an int, which + * must have the value of EOF (-1) or an unsigned char. This variant + * takes a real char, and doesn't accept EOF. + */ +static inline bool cisalnum(char c) +{ + return isalnum((unsigned char)c); +} +static inline bool cisalpha(char c) +{ + return isalpha((unsigned char)c); +} +static inline bool cisascii(char c) +{ + return isascii((unsigned char)c); +} +#if HAVE_ISBLANK +static inline bool cisblank(char c) +{ + return isblank((unsigned char)c); +} +#endif +static inline bool ciscntrl(char c) +{ + return iscntrl((unsigned char)c); +} +static inline bool cisdigit(char c) +{ + return isdigit((unsigned char)c); +} +static inline bool cisgraph(char c) +{ + return isgraph((unsigned char)c); +} +static inline bool cislower(char c) +{ + return islower((unsigned char)c); +} +static inline bool cisprint(char c) +{ + return isprint((unsigned char)c); +} +static inline bool cispunct(char c) +{ + return ispunct((unsigned char)c); +} +static inline bool cisspace(char c) +{ + return isspace((unsigned char)c); +} +static inline bool cisupper(char c) +{ + return isupper((unsigned char)c); +} +static inline bool cisxdigit(char c) +{ + return isxdigit((unsigned char)c); +} + +#include <ccan/str/str_debug.h> + +/* These checks force things out of line, hence they are under DEBUG. */ +#ifdef CCAN_STR_DEBUG +#include <ccan/build_assert/build_assert.h> + +/* These are commonly misused: they take -1 or an *unsigned* char value. */ +#undef isalnum +#undef isalpha +#undef isascii +#undef isblank +#undef iscntrl +#undef isdigit +#undef isgraph +#undef islower +#undef isprint +#undef ispunct +#undef isspace +#undef isupper +#undef isxdigit + +/* You can use a char if char is unsigned. */ +#if HAVE_BUILTIN_TYPES_COMPATIBLE_P && HAVE_TYPEOF +#define str_check_arg_(i) \ + ((i) + BUILD_ASSERT_OR_ZERO(!__builtin_types_compatible_p(typeof(i), \ + char) \ + || (char)255 > 0)) +#else +#define str_check_arg_(i) (i) +#endif + +#define isalnum(i) str_isalnum(str_check_arg_(i)) +#define isalpha(i) str_isalpha(str_check_arg_(i)) +#define isascii(i) str_isascii(str_check_arg_(i)) +#if HAVE_ISBLANK +#define isblank(i) str_isblank(str_check_arg_(i)) +#endif +#define iscntrl(i) str_iscntrl(str_check_arg_(i)) +#define isdigit(i) str_isdigit(str_check_arg_(i)) +#define isgraph(i) str_isgraph(str_check_arg_(i)) +#define islower(i) str_islower(str_check_arg_(i)) +#define isprint(i) str_isprint(str_check_arg_(i)) +#define ispunct(i) str_ispunct(str_check_arg_(i)) +#define isspace(i) str_isspace(str_check_arg_(i)) +#define isupper(i) str_isupper(str_check_arg_(i)) +#define isxdigit(i) str_isxdigit(str_check_arg_(i)) + +#if HAVE_TYPEOF +/* With GNU magic, we can make const-respecting standard string functions. */ +#undef strstr +#undef strchr +#undef strrchr + +/* + 0 is needed to decay array into pointer. */ +#define strstr(haystack, needle) \ + ((typeof((haystack) + 0))str_strstr((haystack), (needle))) +#define strchr(haystack, c) \ + ((typeof((haystack) + 0))str_strchr((haystack), (c))) +#define strrchr(haystack, c) \ + ((typeof((haystack) + 0))str_strrchr((haystack), (c))) +#endif +#endif /* CCAN_STR_DEBUG */ + +#endif /* CCAN_STR_H */ diff --git a/lib/ccan/str/str_debug.h b/lib/ccan/str/str_debug.h new file mode 100644 index 0000000000..6b56477689 --- /dev/null +++ b/lib/ccan/str/str_debug.h @@ -0,0 +1,29 @@ +#ifndef CCAN_STR_DEBUG_H +#define CCAN_STR_DEBUG_H + +/* #define CCAN_STR_DEBUG 1 */ + +#ifdef CCAN_STR_DEBUG +/* Because we mug the real ones with macros, we need our own wrappers. */ +int str_isalnum(int i); +int str_isalpha(int i); +int str_isascii(int i); +#if HAVE_ISBLANK +int str_isblank(int i); +#endif +int str_iscntrl(int i); +int str_isdigit(int i); +int str_isgraph(int i); +int str_islower(int i); +int str_isprint(int i); +int str_ispunct(int i); +int str_isspace(int i); +int str_isupper(int i); +int str_isxdigit(int i); + +char *str_strstr(const char *haystack, const char *needle); +char *str_strchr(const char *s, int c); +char *str_strrchr(const char *s, int c); +#endif /* CCAN_STR_DEBUG */ + +#endif /* CCAN_STR_DEBUG_H */ diff --git a/lib/ccan/str/test/compile_fail-isalnum.c b/lib/ccan/str/test/compile_fail-isalnum.c new file mode 100644 index 0000000000..930defffa0 --- /dev/null +++ b/lib/ccan/str/test/compile_fail-isalnum.c @@ -0,0 +1,22 @@ +#define CCAN_STR_DEBUG 1 +#include <ccan/str/str.h> + +int main(int argc, char *argv[]) +{ +#ifdef FAIL +#if !HAVE_BUILTIN_TYPES_COMPATIBLE_P || !HAVE_TYPEOF +#error We need typeof to check isalnum. +#endif + char +#else + unsigned char +#endif + c = argv[0][0]; + +#ifdef FAIL + /* Fake fail on unsigned char platforms. */ + BUILD_ASSERT((char)255 < 0); +#endif + + return isalnum(c); +} diff --git a/lib/ccan/str/test/compile_fail-isalpha.c b/lib/ccan/str/test/compile_fail-isalpha.c new file mode 100644 index 0000000000..2005109829 --- /dev/null +++ b/lib/ccan/str/test/compile_fail-isalpha.c @@ -0,0 +1,22 @@ +#define CCAN_STR_DEBUG 1 +#include <ccan/str/str.h> + +int main(int argc, char *argv[]) +{ +#ifdef FAIL +#if !HAVE_BUILTIN_TYPES_COMPATIBLE_P || !HAVE_TYPEOF +#error We need typeof to check isalpha. +#endif + char +#else + unsigned char +#endif + c = argv[0][0]; + +#ifdef FAIL + /* Fake fail on unsigned char platforms. */ + BUILD_ASSERT((char)255 < 0); +#endif + + return isalpha(c); +} diff --git a/lib/ccan/str/test/compile_fail-isascii.c b/lib/ccan/str/test/compile_fail-isascii.c new file mode 100644 index 0000000000..ee55e49974 --- /dev/null +++ b/lib/ccan/str/test/compile_fail-isascii.c @@ -0,0 +1,22 @@ +#define CCAN_STR_DEBUG 1 +#include <ccan/str/str.h> + +int main(int argc, char *argv[]) +{ +#ifdef FAIL +#if !HAVE_BUILTIN_TYPES_COMPATIBLE_P || !HAVE_TYPEOF +#error We need typeof to check isascii. +#endif + char +#else + unsigned char +#endif + c = argv[0][0]; + +#ifdef FAIL + /* Fake fail on unsigned char platforms. */ + BUILD_ASSERT((char)255 < 0); +#endif + + return isascii(c); +} diff --git a/lib/ccan/str/test/compile_fail-isblank.c b/lib/ccan/str/test/compile_fail-isblank.c new file mode 100644 index 0000000000..f4cb961d74 --- /dev/null +++ b/lib/ccan/str/test/compile_fail-isblank.c @@ -0,0 +1,26 @@ +#define CCAN_STR_DEBUG 1 +#include <ccan/str/str.h> + +int main(int argc, char *argv[]) +{ +#ifdef FAIL +#if !HAVE_BUILTIN_TYPES_COMPATIBLE_P || !HAVE_TYPEOF || !HAVE_ISBLANK +#error We need typeof to check isblank. +#endif + char +#else + unsigned char +#endif + c = argv[0][0]; + +#ifdef FAIL + /* Fake fail on unsigned char platforms. */ + BUILD_ASSERT((char)255 < 0); +#endif + +#if HAVE_ISBLANK + return isblank(c); +#else + return c; +#endif +} diff --git a/lib/ccan/str/test/compile_fail-iscntrl.c b/lib/ccan/str/test/compile_fail-iscntrl.c new file mode 100644 index 0000000000..bc74146542 --- /dev/null +++ b/lib/ccan/str/test/compile_fail-iscntrl.c @@ -0,0 +1,22 @@ +#define CCAN_STR_DEBUG 1 +#include <ccan/str/str.h> + +int main(int argc, char *argv[]) +{ +#ifdef FAIL +#if !HAVE_BUILTIN_TYPES_COMPATIBLE_P || !HAVE_TYPEOF +#error We need typeof to check iscntrl. +#endif + char +#else + unsigned char +#endif + c = argv[0][0]; + +#ifdef FAIL + /* Fake fail on unsigned char platforms. */ + BUILD_ASSERT((char)255 < 0); +#endif + + return iscntrl(c); +} diff --git a/lib/ccan/str/test/compile_fail-isdigit.c b/lib/ccan/str/test/compile_fail-isdigit.c new file mode 100644 index 0000000000..71d1c71433 --- /dev/null +++ b/lib/ccan/str/test/compile_fail-isdigit.c @@ -0,0 +1,22 @@ +#define CCAN_STR_DEBUG 1 +#include <ccan/str/str.h> + +int main(int argc, char *argv[]) +{ +#ifdef FAIL +#if !HAVE_BUILTIN_TYPES_COMPATIBLE_P || !HAVE_TYPEOF +#error We need typeof to check isdigit. +#endif + char +#else + unsigned char +#endif + c = argv[0][0]; + +#ifdef FAIL + /* Fake fail on unsigned char platforms. */ + BUILD_ASSERT((char)255 < 0); +#endif + + return isdigit(c); +} diff --git a/lib/ccan/str/test/compile_fail-islower.c b/lib/ccan/str/test/compile_fail-islower.c new file mode 100644 index 0000000000..ca3f9907e5 --- /dev/null +++ b/lib/ccan/str/test/compile_fail-islower.c @@ -0,0 +1,22 @@ +#define CCAN_STR_DEBUG 1 +#include <ccan/str/str.h> + +int main(int argc, char *argv[]) +{ +#ifdef FAIL +#if !HAVE_BUILTIN_TYPES_COMPATIBLE_P || !HAVE_TYPEOF +#error We need typeof to check islower. +#endif + char +#else + unsigned char +#endif + c = argv[0][0]; + +#ifdef FAIL + /* Fake fail on unsigned char platforms. */ + BUILD_ASSERT((char)255 < 0); +#endif + + return islower(c); +} diff --git a/lib/ccan/str/test/compile_fail-isprint.c b/lib/ccan/str/test/compile_fail-isprint.c new file mode 100644 index 0000000000..6432e41d2b --- /dev/null +++ b/lib/ccan/str/test/compile_fail-isprint.c @@ -0,0 +1,22 @@ +#define CCAN_STR_DEBUG 1 +#include <ccan/str/str.h> + +int main(int argc, char *argv[]) +{ +#ifdef FAIL +#if !HAVE_BUILTIN_TYPES_COMPATIBLE_P || !HAVE_TYPEOF +#error We need typeof to check isprint. +#endif + char +#else + unsigned char +#endif + c = argv[0][0]; + +#ifdef FAIL + /* Fake fail on unsigned char platforms. */ + BUILD_ASSERT((char)255 < 0); +#endif + + return isprint(c); +} diff --git a/lib/ccan/str/test/compile_fail-ispunct.c b/lib/ccan/str/test/compile_fail-ispunct.c new file mode 100644 index 0000000000..5d941fcba6 --- /dev/null +++ b/lib/ccan/str/test/compile_fail-ispunct.c @@ -0,0 +1,22 @@ +#define CCAN_STR_DEBUG 1 +#include <ccan/str/str.h> + +int main(int argc, char *argv[]) +{ +#ifdef FAIL +#if !HAVE_BUILTIN_TYPES_COMPATIBLE_P || !HAVE_TYPEOF +#error We need typeof to check ispunct. +#endif + char +#else + unsigned char +#endif + c = argv[0][0]; + +#ifdef FAIL + /* Fake fail on unsigned char platforms. */ + BUILD_ASSERT((char)255 < 0); +#endif + + return ispunct(c); +} diff --git a/lib/ccan/str/test/compile_fail-isspace.c b/lib/ccan/str/test/compile_fail-isspace.c new file mode 100644 index 0000000000..bfee1f89f1 --- /dev/null +++ b/lib/ccan/str/test/compile_fail-isspace.c @@ -0,0 +1,22 @@ +#define CCAN_STR_DEBUG 1 +#include <ccan/str/str.h> + +int main(int argc, char *argv[]) +{ +#ifdef FAIL +#if !HAVE_BUILTIN_TYPES_COMPATIBLE_P || !HAVE_TYPEOF +#error We need typeof to check isspace. +#endif + char +#else + unsigned char +#endif + c = argv[0][0]; + +#ifdef FAIL + /* Fake fail on unsigned char platforms. */ + BUILD_ASSERT((char)255 < 0); +#endif + + return isspace(c); +} diff --git a/lib/ccan/str/test/compile_fail-isupper.c b/lib/ccan/str/test/compile_fail-isupper.c new file mode 100644 index 0000000000..4cf9fd3578 --- /dev/null +++ b/lib/ccan/str/test/compile_fail-isupper.c @@ -0,0 +1,22 @@ +#define CCAN_STR_DEBUG 1 +#include <ccan/str/str.h> + +int main(int argc, char *argv[]) +{ +#ifdef FAIL +#if !HAVE_BUILTIN_TYPES_COMPATIBLE_P || !HAVE_TYPEOF +#error We need typeof to check isupper. +#endif + char +#else + unsigned char +#endif + c = argv[0][0]; + +#ifdef FAIL + /* Fake fail on unsigned char platforms. */ + BUILD_ASSERT((char)255 < 0); +#endif + + return isupper(c); +} diff --git a/lib/ccan/str/test/compile_fail-isxdigit.c b/lib/ccan/str/test/compile_fail-isxdigit.c new file mode 100644 index 0000000000..65e6006a88 --- /dev/null +++ b/lib/ccan/str/test/compile_fail-isxdigit.c @@ -0,0 +1,22 @@ +#define CCAN_STR_DEBUG 1 +#include <ccan/str/str.h> + +int main(int argc, char *argv[]) +{ +#ifdef FAIL +#if !HAVE_BUILTIN_TYPES_COMPATIBLE_P || !HAVE_TYPEOF +#error We need typeof to check isxdigit. +#endif + char +#else + unsigned char +#endif + c = argv[0][0]; + +#ifdef FAIL + /* Fake fail on unsigned char platforms. */ + BUILD_ASSERT((char)255 < 0); +#endif + + return isxdigit(c); +} diff --git a/lib/ccan/str/test/compile_fail-strchr.c b/lib/ccan/str/test/compile_fail-strchr.c new file mode 100644 index 0000000000..74a7314d06 --- /dev/null +++ b/lib/ccan/str/test/compile_fail-strchr.c @@ -0,0 +1,18 @@ +#define CCAN_STR_DEBUG 1 +#include <ccan/str/str.h> + +int main(int argc, char *argv[]) +{ +#ifdef FAIL +#if !HAVE_TYPEOF + #error We need typeof to check strstr. +#endif +#else + const +#endif + char *ret; + const char *str = "hello"; + + ret = strchr(str, 'l'); + return ret ? 0 : 1; +} diff --git a/lib/ccan/str/test/compile_fail-strrchr.c b/lib/ccan/str/test/compile_fail-strrchr.c new file mode 100644 index 0000000000..ba7d17e031 --- /dev/null +++ b/lib/ccan/str/test/compile_fail-strrchr.c @@ -0,0 +1,18 @@ +#define CCAN_STR_DEBUG 1 +#include <ccan/str/str.h> + +int main(int argc, char *argv[]) +{ +#ifdef FAIL +#if !HAVE_TYPEOF + #error We need typeof to check strstr. +#endif +#else + const +#endif + char *ret; + const char *str = "hello"; + + ret = strrchr(str, 'l'); + return ret ? 0 : 1; +} diff --git a/lib/ccan/str/test/compile_fail-strstr.c b/lib/ccan/str/test/compile_fail-strstr.c new file mode 100644 index 0000000000..deefef6542 --- /dev/null +++ b/lib/ccan/str/test/compile_fail-strstr.c @@ -0,0 +1,18 @@ +#define CCAN_STR_DEBUG 1 +#include <ccan/str/str.h> + +int main(int argc, char *argv[]) +{ +#ifdef FAIL +#if !HAVE_TYPEOF + #error We need typeof to check strstr. +#endif +#else + const +#endif + char *ret; + const char *str = "hello"; + + ret = strstr(str, "hell"); + return ret ? 0 : 1; +} diff --git a/lib/ccan/str/test/debug.c b/lib/ccan/str/test/debug.c new file mode 100644 index 0000000000..4bd384f2c4 --- /dev/null +++ b/lib/ccan/str/test/debug.c @@ -0,0 +1,5 @@ +/* We can't use the normal "#include the .c file" trick, since this is + contaminated by str.h's macro overrides. So we put it in all tests + like this. */ +#define CCAN_STR_DEBUG 1 +#include <ccan/str/debug.c> diff --git a/lib/ccan/str/test/run.c b/lib/ccan/str/test/run.c new file mode 100644 index 0000000000..a15654f8f3 --- /dev/null +++ b/lib/ccan/str/test/run.c @@ -0,0 +1,105 @@ +#include <ccan/str/str.h> +#include <ccan/str/str.c> +#include <stdlib.h> +#include <stdio.h> +#include <ccan/tap/tap.h> + +#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0])) + +static char *substrings[] = { "far", "bar", "baz", "b", "ba", "z", "ar", NULL }; + +#define NUM_SUBSTRINGS (ARRAY_SIZE(substrings) - 1) + +static char *strdup_rev(const char *s) +{ + char *ret = strdup(s); + unsigned int i; + + for (i = 0; i < strlen(s); i++) + ret[i] = s[strlen(s) - i - 1]; + return ret; +} + +int main(int argc, char *argv[]) +{ + unsigned int i, j, n; + char *strings[NUM_SUBSTRINGS * NUM_SUBSTRINGS]; + + n = 0; + for (i = 0; i < NUM_SUBSTRINGS; i++) { + for (j = 0; j < NUM_SUBSTRINGS; j++) { + strings[n] = malloc(strlen(substrings[i]) + + strlen(substrings[j]) + 1); + sprintf(strings[n++], "%s%s", + substrings[i], substrings[j]); + } + } + + plan_tests(n * n * 5 + 16); + for (i = 0; i < n; i++) { + for (j = 0; j < n; j++) { + unsigned int k, identical = 0; + char *reva, *revb; + + /* Find first difference. */ + for (k = 0; strings[i][k]==strings[j][k]; k++) { + if (k == strlen(strings[i])) { + identical = 1; + break; + } + } + + if (identical) + ok1(streq(strings[i], strings[j])); + else + ok1(!streq(strings[i], strings[j])); + + /* Postfix test should be equivalent to prefix + * test on reversed string. */ + reva = strdup_rev(strings[i]); + revb = strdup_rev(strings[j]); + + if (!strings[i][k]) { + ok1(strstarts(strings[j], strings[i])); + ok1(strends(revb, reva)); + } else { + ok1(!strstarts(strings[j], strings[i])); + ok1(!strends(revb, reva)); + } + if (!strings[j][k]) { + ok1(strstarts(strings[i], strings[j])); + ok1(strends(reva, revb)); + } else { + ok1(!strstarts(strings[i], strings[j])); + ok1(!strends(reva, revb)); + } + free(reva); + free(revb); + } + } + + for (i = 0; i < n; i++) + free(strings[i]); + + ok1(streq(stringify(NUM_SUBSTRINGS), + "((sizeof(substrings) / sizeof(substrings[0])) - 1)")); + ok1(streq(stringify(ARRAY_SIZE(substrings)), + "(sizeof(substrings) / sizeof(substrings[0]))")); + ok1(streq(stringify(i == 0), "i == 0")); + + ok1(strcount("aaaaaa", "b") == 0); + ok1(strcount("aaaaaa", "a") == 6); + ok1(strcount("aaaaaa", "aa") == 3); + ok1(strcount("aaaaaa", "aaa") == 2); + ok1(strcount("aaaaaa", "aaaa") == 1); + ok1(strcount("aaaaaa", "aaaaa") == 1); + ok1(strcount("aaaaaa", "aaaaaa") == 1); + ok1(strcount("aaa aaa", "b") == 0); + ok1(strcount("aaa aaa", "a") == 6); + ok1(strcount("aaa aaa", "aa") == 2); + ok1(strcount("aaa aaa", "aaa") == 2); + ok1(strcount("aaa aaa", "aaaa") == 0); + ok1(strcount("aaa aaa", "aaaaa") == 0); + + return exit_status(); +} diff --git a/lib/ccan/tally/LICENSE b/lib/ccan/tally/LICENSE new file mode 100644 index 0000000000..cca7fc278f --- /dev/null +++ b/lib/ccan/tally/LICENSE @@ -0,0 +1,165 @@ + GNU LESSER GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/> + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + + This version of the GNU Lesser General Public License incorporates +the terms and conditions of version 3 of the GNU General Public +License, supplemented by the additional permissions listed below. + + 0. Additional Definitions. + + As used herein, "this License" refers to version 3 of the GNU Lesser +General Public License, and the "GNU GPL" refers to version 3 of the GNU +General Public License. + + "The Library" refers to a covered work governed by this License, +other than an Application or a Combined Work as defined below. + + An "Application" is any work that makes use of an interface provided +by the Library, but which is not otherwise based on the Library. +Defining a subclass of a class defined by the Library is deemed a mode +of using an interface provided by the Library. + + A "Combined Work" is a work produced by combining or linking an +Application with the Library. The particular version of the Library +with which the Combined Work was made is also called the "Linked +Version". + + The "Minimal Corresponding Source" for a Combined Work means the +Corresponding Source for the Combined Work, excluding any source code +for portions of the Combined Work that, considered in isolation, are +based on the Application, and not on the Linked Version. + + The "Corresponding Application Code" for a Combined Work means the +object code and/or source code for the Application, including any data +and utility programs needed for reproducing the Combined Work from the +Application, but excluding the System Libraries of the Combined Work. + + 1. Exception to Section 3 of the GNU GPL. + + You may convey a covered work under sections 3 and 4 of this License +without being bound by section 3 of the GNU GPL. + + 2. Conveying Modified Versions. + + If you modify a copy of the Library, and, in your modifications, a +facility refers to a function or data to be supplied by an Application +that uses the facility (other than as an argument passed when the +facility is invoked), then you may convey a copy of the modified +version: + + a) under this License, provided that you make a good faith effort to + ensure that, in the event an Application does not supply the + function or data, the facility still operates, and performs + whatever part of its purpose remains meaningful, or + + b) under the GNU GPL, with none of the additional permissions of + this License applicable to that copy. + + 3. Object Code Incorporating Material from Library Header Files. + + The object code form of an Application may incorporate material from +a header file that is part of the Library. You may convey such object +code under terms of your choice, provided that, if the incorporated +material is not limited to numerical parameters, data structure +layouts and accessors, or small macros, inline functions and templates +(ten or fewer lines in length), you do both of the following: + + a) Give prominent notice with each copy of the object code that the + Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the object code with a copy of the GNU GPL and this license + document. + + 4. Combined Works. + + You may convey a Combined Work under terms of your choice that, +taken together, effectively do not restrict modification of the +portions of the Library contained in the Combined Work and reverse +engineering for debugging such modifications, if you also do each of +the following: + + a) Give prominent notice with each copy of the Combined Work that + the Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the Combined Work with a copy of the GNU GPL and this license + document. + + c) For a Combined Work that displays copyright notices during + execution, include the copyright notice for the Library among + these notices, as well as a reference directing the user to the + copies of the GNU GPL and this license document. + + d) Do one of the following: + + 0) Convey the Minimal Corresponding Source under the terms of this + License, and the Corresponding Application Code in a form + suitable for, and under terms that permit, the user to + recombine or relink the Application with a modified version of + the Linked Version to produce a modified Combined Work, in the + manner specified by section 6 of the GNU GPL for conveying + Corresponding Source. + + 1) Use a suitable shared library mechanism for linking with the + Library. A suitable mechanism is one that (a) uses at run time + a copy of the Library already present on the user's computer + system, and (b) will operate properly with a modified version + of the Library that is interface-compatible with the Linked + Version. + + e) Provide Installation Information, but only if you would otherwise + be required to provide such information under section 6 of the + GNU GPL, and only to the extent that such information is + necessary to install and execute a modified version of the + Combined Work produced by recombining or relinking the + Application with a modified version of the Linked Version. (If + you use option 4d0, the Installation Information must accompany + the Minimal Corresponding Source and Corresponding Application + Code. If you use option 4d1, you must provide the Installation + Information in the manner specified by section 6 of the GNU GPL + for conveying Corresponding Source.) + + 5. Combined Libraries. + + You may place library facilities that are a work based on the +Library side by side in a single library together with other library +facilities that are not Applications and are not covered by this +License, and convey such a combined library under terms of your +choice, if you do both of the following: + + a) Accompany the combined library with a copy of the same work based + on the Library, uncombined with any other library facilities, + conveyed under the terms of this License. + + b) Give prominent notice with the combined library that part of it + is a work based on the Library, and explaining where to find the + accompanying uncombined form of the same work. + + 6. Revised Versions of the GNU Lesser General Public License. + + The Free Software Foundation may publish revised and/or new versions +of the GNU Lesser General Public License from time to time. Such new +versions will be similar in spirit to the present version, but may +differ in detail to address new problems or concerns. + + Each version is given a distinguishing version number. If the +Library as you received it specifies that a certain numbered version +of the GNU Lesser General Public License "or any later version" +applies to it, you have the option of following the terms and +conditions either of that published version or of any later version +published by the Free Software Foundation. If the Library as you +received it does not specify a version number of the GNU Lesser +General Public License, you may choose any version of the GNU Lesser +General Public License ever published by the Free Software Foundation. + + If the Library as you received it specifies that a proxy can decide +whether future versions of the GNU Lesser General Public License shall +apply, that proxy's public statement of acceptance of any version is +permanent authorization for you to choose that version for the +Library. diff --git a/lib/ccan/tally/_info b/lib/ccan/tally/_info new file mode 100644 index 0000000000..1d67274f5c --- /dev/null +++ b/lib/ccan/tally/_info @@ -0,0 +1,58 @@ +#include <stdio.h> +#include <string.h> +#include "config.h" + +/** + * tally - running tally of integers + * + * The tally module implements simple analysis of a stream of integers. + * Numbers are fed in via tally_add(), and then the mean, median, mode and + * a histogram can be read out. + * + * Example: + * #include <stdio.h> + * #include <err.h> + * #include <ccan/tally/tally.h> + * + * int main(int argc, char *argv[]) + * { + * struct tally *t; + * unsigned int i; + * size_t err; + * ssize_t val; + * char *histogram; + * + * if (argc < 2) + * errx(1, "Usage: %s <number>...\n", argv[0]); + * + * t = tally_new(100); + * for (i = 1; i < argc; i++) + * tally_add(t, atol(argv[i])); + * + * printf("Mean = %zi\n", tally_mean(t)); + * val = tally_approx_median(t, &err); + * printf("Median = %zi (+/- %zu)\n", val, err); + * val = tally_approx_mode(t, &err); + * printf("Mode = %zi (+/- %zu)\n", val, err); + * histogram = tally_histogram(t, 50, 10); + * printf("Histogram:\n%s", histogram); + * free(histogram); + * return 0; + * } + * + * License: LGPL (3 or any later version) + * Author: Rusty Russell <rusty@rustcorp.com.au> + */ +int main(int argc, char *argv[]) +{ + if (argc != 2) + return 1; + + if (strcmp(argv[1], "depends") == 0) { + printf("ccan/build_assert\n"); + printf("ccan/likely\n"); + return 0; + } + + return 1; +} diff --git a/lib/ccan/tally/tally.c b/lib/ccan/tally/tally.c new file mode 100644 index 0000000000..b1839befe3 --- /dev/null +++ b/lib/ccan/tally/tally.c @@ -0,0 +1,490 @@ +#include <ccan/tally/tally.h> +#include <ccan/build_assert/build_assert.h> +#include <ccan/likely/likely.h> +#include <stdint.h> +#include <limits.h> +#include <string.h> +#include <stdio.h> +#include <assert.h> +#include <stdlib.h> + +#define SIZET_BITS (sizeof(size_t)*CHAR_BIT) + +/* We use power of 2 steps. I tried being tricky, but it got buggy. */ +struct tally { + ssize_t min, max; + size_t total[2]; + /* This allows limited frequency analysis. */ + unsigned buckets, step_bits; + size_t counts[1 /* Actually: [buckets] */ ]; +}; + +struct tally *tally_new(unsigned buckets) +{ + struct tally *tally; + + /* There is always 1 bucket. */ + if (buckets == 0) + buckets = 1; + + /* Check for overflow. */ + if (buckets && SIZE_MAX / buckets < sizeof(tally->counts[0])) + return NULL; + tally = malloc(sizeof(*tally) + sizeof(tally->counts[0])*(buckets-1)); + if (tally) { + tally->max = ((size_t)1 << (SIZET_BITS - 1)); + tally->min = ~tally->max; + tally->total[0] = tally->total[1] = 0; + tally->buckets = buckets; + tally->step_bits = 0; + memset(tally->counts, 0, sizeof(tally->counts[0])*buckets); + } + return tally; +} + +static unsigned bucket_of(ssize_t min, unsigned step_bits, ssize_t val) +{ + /* Don't over-shift. */ + if (step_bits == SIZET_BITS) + return 0; + assert(step_bits < SIZET_BITS); + return (size_t)(val - min) >> step_bits; +} + +/* Return the min value in bucket b. */ +static ssize_t bucket_min(ssize_t min, unsigned step_bits, unsigned b) +{ + /* Don't over-shift. */ + if (step_bits == SIZET_BITS) + return min; + assert(step_bits < SIZET_BITS); + return min + ((ssize_t)b << step_bits); +} + +/* Does shifting by this many bits truncate the number? */ +static bool shift_overflows(size_t num, unsigned bits) +{ + if (bits == 0) + return false; + + return ((num << bits) >> 1) != (num << (bits - 1)); +} + +/* When min or max change, we may need to shuffle the frequency counts. */ +static void renormalize(struct tally *tally, + ssize_t new_min, ssize_t new_max) +{ + size_t range, spill; + unsigned int i, old_min; + + /* Uninitialized? Don't do anything... */ + if (tally->max < tally->min) + goto update; + + /* If we don't have sufficient range, increase step bits until + * buckets cover entire range of ssize_t anyway. */ + range = (new_max - new_min) + 1; + while (!shift_overflows(tally->buckets, tally->step_bits) + && range > ((size_t)tally->buckets << tally->step_bits)) { + /* Collapse down. */ + for (i = 1; i < tally->buckets; i++) { + tally->counts[i/2] += tally->counts[i]; + tally->counts[i] = 0; + } + tally->step_bits++; + } + + /* Now if minimum has dropped, move buckets up. */ + old_min = bucket_of(new_min, tally->step_bits, tally->min); + memmove(tally->counts + old_min, + tally->counts, + sizeof(tally->counts[0]) * (tally->buckets - old_min)); + memset(tally->counts, 0, sizeof(tally->counts[0]) * old_min); + + /* If we moved boundaries, adjust buckets to that ratio. */ + spill = (tally->min - new_min) % (1 << tally->step_bits); + for (i = 0; i < tally->buckets-1; i++) { + size_t adjust = (tally->counts[i] >> tally->step_bits) * spill; + tally->counts[i] -= adjust; + tally->counts[i+1] += adjust; + } + +update: + tally->min = new_min; + tally->max = new_max; +} + +void tally_add(struct tally *tally, ssize_t val) +{ + ssize_t new_min = tally->min, new_max = tally->max; + bool need_renormalize = false; + + if (val < tally->min) { + new_min = val; + need_renormalize = true; + } + if (val > tally->max) { + new_max = val; + need_renormalize = true; + } + if (need_renormalize) + renormalize(tally, new_min, new_max); + + /* 128-bit arithmetic! If we didn't want exact mean, we could just + * pull it out of counts. */ + if (val > 0 && tally->total[0] + val < tally->total[0]) + tally->total[1]++; + else if (val < 0 && tally->total[0] + val > tally->total[0]) + tally->total[1]--; + tally->total[0] += val; + tally->counts[bucket_of(tally->min, tally->step_bits, val)]++; +} + +size_t tally_num(const struct tally *tally) +{ + size_t i, num = 0; + for (i = 0; i < tally->buckets; i++) + num += tally->counts[i]; + return num; +} + +ssize_t tally_min(const struct tally *tally) +{ + return tally->min; +} + +ssize_t tally_max(const struct tally *tally) +{ + return tally->max; +} + +/* FIXME: Own ccan module please! */ +static unsigned fls64(uint64_t val) +{ +#if HAVE_BUILTIN_CLZL + if (val <= ULONG_MAX) { + /* This is significantly faster! */ + return val ? sizeof(long) * CHAR_BIT - __builtin_clzl(val) : 0; + } else { +#endif + uint64_t r = 64; + + if (!val) + return 0; + if (!(val & 0xffffffff00000000ull)) { + val <<= 32; + r -= 32; + } + if (!(val & 0xffff000000000000ull)) { + val <<= 16; + r -= 16; + } + if (!(val & 0xff00000000000000ull)) { + val <<= 8; + r -= 8; + } + if (!(val & 0xf000000000000000ull)) { + val <<= 4; + r -= 4; + } + if (!(val & 0xc000000000000000ull)) { + val <<= 2; + r -= 2; + } + if (!(val & 0x8000000000000000ull)) { + val <<= 1; + r -= 1; + } + return r; +#if HAVE_BUILTIN_CLZL + } +#endif +} + +/* This is stolen straight from Hacker's Delight. */ +static uint64_t divlu64(uint64_t u1, uint64_t u0, uint64_t v) +{ + const uint64_t b = 4294967296ULL; // Number base (32 bits). + uint32_t un[4], // Dividend and divisor + vn[2]; // normalized and broken + // up into halfwords. + uint32_t q[2]; // Quotient as halfwords. + uint64_t un1, un0, // Dividend and divisor + vn0; // as fullwords. + uint64_t qhat; // Estimated quotient digit. + uint64_t rhat; // A remainder. + uint64_t p; // Product of two digits. + int64_t s, i, j, t, k; + + if (u1 >= v) // If overflow, return the largest + return (uint64_t)-1; // possible quotient. + + s = 64 - fls64(v); // 0 <= s <= 63. + vn0 = v << s; // Normalize divisor. + vn[1] = vn0 >> 32; // Break divisor up into + vn[0] = vn0 & 0xFFFFFFFF; // two 32-bit halves. + + // Shift dividend left. + un1 = ((u1 << s) | (u0 >> (64 - s))) & (-s >> 63); + un0 = u0 << s; + un[3] = un1 >> 32; // Break dividend up into + un[2] = un1; // four 32-bit halfwords + un[1] = un0 >> 32; // Note: storing into + un[0] = un0; // halfwords truncates. + + for (j = 1; j >= 0; j--) { + // Compute estimate qhat of q[j]. + qhat = (un[j+2]*b + un[j+1])/vn[1]; + rhat = (un[j+2]*b + un[j+1]) - qhat*vn[1]; + again: + if (qhat >= b || qhat*vn[0] > b*rhat + un[j]) { + qhat = qhat - 1; + rhat = rhat + vn[1]; + if (rhat < b) goto again; + } + + // Multiply and subtract. + k = 0; + for (i = 0; i < 2; i++) { + p = qhat*vn[i]; + t = un[i+j] - k - (p & 0xFFFFFFFF); + un[i+j] = t; + k = (p >> 32) - (t >> 32); + } + t = un[j+2] - k; + un[j+2] = t; + + q[j] = qhat; // Store quotient digit. + if (t < 0) { // If we subtracted too + q[j] = q[j] - 1; // much, add back. + k = 0; + for (i = 0; i < 2; i++) { + t = un[i+j] + vn[i] + k; + un[i+j] = t; + k = t >> 32; + } + un[j+2] = un[j+2] + k; + } + } // End j. + + return q[1]*b + q[0]; +} + +static int64_t divls64(int64_t u1, uint64_t u0, int64_t v) +{ + int64_t q, uneg, vneg, diff, borrow; + + uneg = u1 >> 63; // -1 if u < 0. + if (uneg) { // Compute the absolute + u0 = -u0; // value of the dividend u. + borrow = (u0 != 0); + u1 = -u1 - borrow; + } + + vneg = v >> 63; // -1 if v < 0. + v = (v ^ vneg) - vneg; // Absolute value of v. + + if ((uint64_t)u1 >= (uint64_t)v) + goto overflow; + + q = divlu64(u1, u0, v); + + diff = uneg ^ vneg; // Negate q if signs of + q = (q ^ diff) - diff; // u and v differed. + + if ((diff ^ q) < 0 && q != 0) { // If overflow, return the largest + overflow: // possible neg. quotient. + q = 0x8000000000000000ULL; + } + return q; +} + +ssize_t tally_mean(const struct tally *tally) +{ + size_t count = tally_num(tally); + if (!count) + return 0; + + if (sizeof(tally->total[0]) == sizeof(uint32_t)) { + /* Use standard 64-bit arithmetic. */ + int64_t total = tally->total[0] + | (((uint64_t)tally->total[1]) << 32); + return total / count; + } + return divls64(tally->total[1], tally->total[0], count); +} + +ssize_t tally_total(const struct tally *tally, ssize_t *overflow) +{ + if (overflow) { + *overflow = tally->total[1]; + return tally->total[0]; + } + + /* If result is negative, make sure we can represent it. */ + if (tally->total[1] & ((size_t)1 << (SIZET_BITS-1))) { + /* Must have only underflowed once, and must be able to + * represent result at ssize_t. */ + if ((~tally->total[1])+1 != 0 + || (ssize_t)tally->total[0] >= 0) { + /* Underflow, return minimum. */ + return (ssize_t)((size_t)1 << (SIZET_BITS - 1)); + } + } else { + /* Result is positive, must not have overflowed, and must be + * able to represent as ssize_t. */ + if (tally->total[1] || (ssize_t)tally->total[0] < 0) { + /* Overflow. Return maximum. */ + return (ssize_t)~((size_t)1 << (SIZET_BITS - 1)); + } + } + return tally->total[0]; +} + +static ssize_t bucket_range(const struct tally *tally, unsigned b, size_t *err) +{ + ssize_t min, max; + + min = bucket_min(tally->min, tally->step_bits, b); + if (b == tally->buckets - 1) + max = tally->max; + else + max = bucket_min(tally->min, tally->step_bits, b+1) - 1; + + /* FIXME: Think harder about cumulative error; is this enough?. */ + *err = (max - min + 1) / 2; + /* Avoid overflow. */ + return min + (max - min) / 2; +} + +ssize_t tally_approx_median(const struct tally *tally, size_t *err) +{ + size_t count = tally_num(tally), total = 0; + unsigned int i; + + for (i = 0; i < tally->buckets; i++) { + total += tally->counts[i]; + if (total * 2 >= count) + break; + } + return bucket_range(tally, i, err); +} + +ssize_t tally_approx_mode(const struct tally *tally, size_t *err) +{ + unsigned int i, min_best = 0, max_best = 0; + + for (i = 0; i < tally->buckets; i++) { + if (tally->counts[i] > tally->counts[min_best]) { + min_best = max_best = i; + } else if (tally->counts[i] == tally->counts[min_best]) { + max_best = i; + } + } + + /* We can have more than one best, making our error huge. */ + if (min_best != max_best) { + ssize_t min, max; + min = bucket_range(tally, min_best, err); + max = bucket_range(tally, max_best, err); + max += *err; + *err += (size_t)(max - min); + return min + (max - min) / 2; + } + + return bucket_range(tally, min_best, err); +} + +static unsigned get_max_bucket(const struct tally *tally) +{ + unsigned int i; + + for (i = tally->buckets; i > 0; i--) + if (tally->counts[i-1]) + break; + return i; +} + +char *tally_histogram(const struct tally *tally, + unsigned width, unsigned height) +{ + unsigned int i, count, max_bucket, largest_bucket; + struct tally *tmp; + char *graph, *p; + + assert(width >= TALLY_MIN_HISTO_WIDTH); + assert(height >= TALLY_MIN_HISTO_HEIGHT); + + /* Ignore unused buckets. */ + max_bucket = get_max_bucket(tally); + + /* FIXME: It'd be nice to smooth here... */ + if (height >= max_bucket) { + height = max_bucket; + tmp = NULL; + } else { + /* We create a temporary then renormalize so < height. */ + /* FIXME: Antialias properly! */ + tmp = tally_new(tally->buckets); + if (!tmp) + return NULL; + tmp->min = tally->min; + tmp->max = tally->max; + tmp->step_bits = tally->step_bits; + memcpy(tmp->counts, tally->counts, + sizeof(tally->counts[0]) * tmp->buckets); + while ((max_bucket = get_max_bucket(tmp)) >= height) + renormalize(tmp, tmp->min, tmp->max * 2); + /* Restore max */ + tmp->max = tally->max; + tally = tmp; + height = max_bucket; + } + + /* Figure out longest line, for scale. */ + largest_bucket = 0; + for (i = 0; i < tally->buckets; i++) { + if (tally->counts[i] > largest_bucket) + largest_bucket = tally->counts[i]; + } + + p = graph = malloc(height * (width + 1) + 1); + if (!graph) { + free(tmp); + return NULL; + } + + for (i = 0; i < height; i++) { + unsigned covered = 1, row; + + /* People expect minimum at the bottom. */ + row = height - i - 1; + count = (double)tally->counts[row] / largest_bucket * (width-1)+1; + + if (row == 0) + covered = snprintf(p, width, "%zi", tally->min); + else if (row == height - 1) + covered = snprintf(p, width, "%zi", tally->max); + else if (row == bucket_of(tally->min, tally->step_bits, 0)) + *p = '+'; + else + *p = '|'; + + if (covered > width) + covered = width; + p += covered; + + if (count > covered) + count -= covered; + else + count = 0; + + memset(p, '*', count); + p += count; + *p = '\n'; + p++; + } + *p = '\0'; + free(tmp); + return graph; +} diff --git a/lib/ccan/tally/tally.h b/lib/ccan/tally/tally.h new file mode 100644 index 0000000000..650e2656cd --- /dev/null +++ b/lib/ccan/tally/tally.h @@ -0,0 +1,104 @@ +#ifndef CCAN_TALLY_H +#define CCAN_TALLY_H +#include "config.h" +#include <sys/types.h> + +struct tally; + +/** + * tally_new - allocate the tally structure. + * @buckets: the number of frequency buckets. + * + * This allocates a tally structure using malloc(). The greater the value + * of @buckets, the more accurate tally_approx_median() and tally_approx_mode() + * and tally_histogram() will be, but more memory is consumed. If you want + * to use tally_histogram(), the optimal bucket value is the same as that + * @height argument. + */ +struct tally *tally_new(unsigned int buckets); + +/** + * tally_add - add a value. + * @tally: the tally structure. + * @val: the value to add. + */ +void tally_add(struct tally *tally, ssize_t val); + +/** + * tally_num - how many times as tally_add been called? + * @tally: the tally structure. + */ +size_t tally_num(const struct tally *tally); + +/** + * tally_min - the minimum value passed to tally_add. + * @tally: the tally structure. + * + * Undefined if tally_num() == 0. + */ +ssize_t tally_min(const struct tally *tally); + +/** + * tally_max - the maximum value passed to tally_add. + * @tally: the tally structure. + * + * Undefined if tally_num() == 0. + */ +ssize_t tally_max(const struct tally *tally); + +/** + * tally_mean - the mean value passed to tally_add. + * @tally: the tally structure. + * + * Undefined if tally_num() == 0, but will not crash. + */ +ssize_t tally_mean(const struct tally *tally); + +/** + * tally_total - the total value passed to tally_add. + * @tally: the tally structure. + * @overflow: the overflow value (or NULL). + * + * If your total can't overflow a ssize_t, you don't need @overflow. + * Otherwise, @overflow is the upper ssize_t, and the return value should + * be treated as the lower size_t (ie. the sign bit is in @overflow). + */ +ssize_t tally_total(const struct tally *tally, ssize_t *overflow); + +/** + * tally_approx_median - the approximate median value passed to tally_add. + * @tally: the tally structure. + * @err: the error in the returned value (ie. real median is +/- @err). + * + * Undefined if tally_num() == 0, but will not crash. Because we + * don't reallocate, we don't store all values, so this median cannot be + * exact. + */ +ssize_t tally_approx_median(const struct tally *tally, size_t *err); + +/** + * tally_approx_mode - the approximate mode value passed to tally_add. + * @tally: the tally structure. + * @err: the error in the returned value (ie. real mode is +/- @err). + * + * Undefined if tally_num() == 0, but will not crash. Because we + * don't reallocate, we don't store all values, so this mode cannot be + * exact. It could well be a value which was never passed to tally_add! + */ +ssize_t tally_approx_mode(const struct tally *tally, size_t *err); + +#define TALLY_MIN_HISTO_WIDTH 8 +#define TALLY_MIN_HISTO_HEIGHT 3 + +/** + * tally_graph - return an ASCII image of the tally_add distribution + * @tally: the tally structure. + * @width: the maximum string width to use (>= TALLY_MIN_HISTO_WIDTH) + * @height: the maximum string height to use (>= TALLY_MIN_HISTO_HEIGHT) + * + * Returns a malloc()ed string which draws a multi-line graph of the + * distribution of values. On out of memory returns NULL. + */ +char *tally_histogram(const struct tally *tally, + unsigned width, unsigned height); +#endif /* CCAN_TALLY_H */ diff --git a/lib/ccan/tally/test/run-bucket_of.c b/lib/ccan/tally/test/run-bucket_of.c new file mode 100644 index 0000000000..5e12725757 --- /dev/null +++ b/lib/ccan/tally/test/run-bucket_of.c @@ -0,0 +1,71 @@ +#include <ccan/tally/tally.c> +#include <ccan/tap/tap.h> + +int main(void) +{ + unsigned int i, max_step; + ssize_t min, max; + + max = (ssize_t)~(1ULL << (sizeof(max)*CHAR_BIT - 1)); + min = (ssize_t)(1ULL << (sizeof(max)*CHAR_BIT - 1)); + max_step = sizeof(max)*CHAR_BIT; + + plan_tests(2 + 100 + 10 + 5 + + 2 + 100 + 5 + 4 + + (1 << 7) * (max_step - 7)); + + /* Single step, single bucket == easy. */ + ok1(bucket_of(0, 0, 0) == 0); + + /* Double step, still in first bucket. */ + ok1(bucket_of(0, 1, 0) == 0); + + /* Step 8. */ + for (i = 0; i < 100; i++) + ok1(bucket_of(0, 3, i) == i >> 3); + + /* 10 values in 5 buckets, step 2. */ + for (i = 0; i < 10; i++) + ok1(bucket_of(0, 1, i) == i >> 1); + + /* Extreme cases. */ + ok1(bucket_of(min, 0, min) == 0); + ok1(bucket_of(min, max_step-1, min) == 0); + ok1(bucket_of(min, max_step-1, max) == 1); + ok1(bucket_of(min, max_step, min) == 0); + ok1(bucket_of(min, max_step, max) == 0); + + /* Now, bucket_min() should match: */ + ok1(bucket_min(0, 0, 0) == 0); + + /* Double step, val in first bucket still 0. */ + ok1(bucket_min(0, 1, 0) == 0); + + /* Step 8. */ + for (i = 0; i < 100; i++) + ok1(bucket_min(0, 3, i) == i << 3); + + /* 10 values in 5 buckets, step 2. */ + for (i = 0; i < 5; i++) + ok1(bucket_min(0, 1, i) == i << 1); + + /* Extreme cases. */ + ok1(bucket_min(min, 0, 0) == min); + ok1(bucket_min(min, max_step-1, 0) == min); + ok1(bucket_min(min, max_step-1, 1) == 0); + ok1(bucket_min(min, max_step, 0) == min); + + /* Now, vary step and number of buckets, but bucket_min and bucket_of + * must agree. */ + for (i = 0; i < (1 << 7); i++) { + unsigned int j; + for (j = 0; j < max_step - 7; j++) { + ssize_t val; + + val = bucket_min(-(ssize_t)i, j, i); + ok1(bucket_of(-(ssize_t)i, j, val) == i); + } + } + + return exit_status(); +} diff --git a/lib/ccan/tally/test/run-divlu64.c b/lib/ccan/tally/test/run-divlu64.c new file mode 100644 index 0000000000..057e47432c --- /dev/null +++ b/lib/ccan/tally/test/run-divlu64.c @@ -0,0 +1,31 @@ +#include <ccan/tally/tally.c> +#include <ccan/tap/tap.h> + +int main(void) +{ + unsigned int i, j; + + plan_tests(5985); + /* Simple tests. */ + for (i = 0; i < 127; i++) { + uint64_t u1, u0; + if (i < 64) { + u1 = 0; + u0 = 1ULL << i; + j = 0; + } else { + u1 = 1ULL << (i - 64); + u0 = 0; + j = i - 63; + } + for (; j < 63; j++) { + uint64_t answer; + if (j > i) + answer = 0; + else + answer = 1ULL << (i - j); + ok1(divlu64(u1, u0, 1ULL << j) == answer); + } + } + return exit_status(); +} diff --git a/lib/ccan/tally/test/run-histogram.c b/lib/ccan/tally/test/run-histogram.c new file mode 100644 index 0000000000..a9894ecd85 --- /dev/null +++ b/lib/ccan/tally/test/run-histogram.c @@ -0,0 +1,108 @@ +#include <ccan/tally/tally.c> +#include <ccan/tap/tap.h> + +int main(void) +{ + int i; + struct tally *tally; + char *graph, *p; + + plan_tests(100 + 1 + 10 + 1 + 100 + 1 + 10 + 1 + 10 * 2 + 1); + + /* Uniform distribution, easy. */ + tally = tally_new(100); + for (i = 0; i < 100; i++) + tally_add(tally, i); + + /* 1:1 height. */ + graph = p = tally_histogram(tally, 20, 100); + for (i = 0; i < 100; i++) { + char *eol = strchr(p, '\n'); + + /* We expect it filled all way to the end. */ + ok1(eol - p == 20); + p = eol + 1; + } + ok1(!*p); + free(graph); + + /* Reduced height. */ + graph = p = tally_histogram(tally, 20, 10); + for (i = 0; i < 10; i++) { + char *eol = strchr(p, '\n'); + + /* First once can be truncated (bucket aliasing) */ + if (eol) { + ok1(eol - p == 20 || (eol - p < 20 && i == 0)); + } else + /* We should, at worst, half-fill graph */ + ok1(i > 5); + + if (eol) + p = eol + 1; + } + ok1(!*p); + free(graph); + + /* Enlarged height (gets capped). */ + graph = p = tally_histogram(tally, 20, 1000); + for (i = 0; i < 100; i++) { + char *eol = strchr(p, '\n'); + /* We expect it filled all way to the end. */ + ok1(eol - p == 20); + p = eol + 1; + } + ok1(!*p); + free(graph); + free(tally); + + /* Distinctive increasing pattern. */ + tally = tally_new(10); + for (i = 0; i < 10; i++) { + unsigned int j; + for (j = 0; j <= i; j++) + tally_add(tally, i); + } + + graph = p = tally_histogram(tally, 10, 10); + for (i = 0; i < 10; i++) { + char *eol = strchr(p, '\n'); + ok1(eol - p == 10 - i); + p = eol + 1; + } + ok1(!*p); + diag("Here's the pretty: %s", graph); + free(graph); + free(tally); + + /* With negative values. */ + tally = tally_new(10); + for (i = 0; i < 10; i++) { + tally_add(tally, i - 5); + } + + graph = p = tally_histogram(tally, 10, 10); + for (i = 0; i < 10; i++) { + char *eol = strchr(p, '\n'); + + /* We expect it filled all way to the end. */ + ok1(eol - p == 10); + + /* Check min/max labels. */ + if (i == 0) + ok1(strncmp(p, "4*", 2) == 0); + else if (i == 9) + ok1(strncmp(p, "-5*", 3) == 0); + else if (i == 4) + ok1(p[0] == '+'); /* 0 marker */ + else + ok1(p[0] == '|'); + p = eol + 1; + } + ok1(!*p); + diag("Here's the pretty: %s", graph); + free(graph); + free(tally); + + return exit_status(); +} diff --git a/lib/ccan/tally/test/run-mean.c b/lib/ccan/tally/test/run-mean.c new file mode 100644 index 0000000000..b43dea6b28 --- /dev/null +++ b/lib/ccan/tally/test/run-mean.c @@ -0,0 +1,30 @@ +#include <ccan/tally/tally.c> +#include <ccan/tap/tap.h> + +int main(void) +{ + int i; + struct tally *tally = tally_new(0); + ssize_t min, max; + + max = (ssize_t)~(1ULL << (sizeof(max)*CHAR_BIT - 1)); + min = (ssize_t)(1ULL << (sizeof(max)*CHAR_BIT - 1)); + + plan_tests(100 + 100); + /* Simple mean test: should always be 0. */ + for (i = 0; i < 100; i++) { + tally_add(tally, i); + tally_add(tally, -i); + ok1(tally_mean(tally) == 0); + } + + /* Works for big values too... */ + for (i = 0; i < 100; i++) { + tally_add(tally, max - i); + tally_add(tally, min + 1 + i); + ok1(tally_mean(tally) == 0); + } + + free(tally); + return exit_status(); +} diff --git a/lib/ccan/tally/test/run-median.c b/lib/ccan/tally/test/run-median.c new file mode 100644 index 0000000000..b12fd8a021 --- /dev/null +++ b/lib/ccan/tally/test/run-median.c @@ -0,0 +1,46 @@ +#include <ccan/tally/tally.c> +#include <ccan/tap/tap.h> + +int main(void) +{ + int i; + struct tally *tally = tally_new(100); + ssize_t min, max, median; + size_t err; + + max = (ssize_t)~(1ULL << (sizeof(max)*CHAR_BIT - 1)); + min = (ssize_t)(1ULL << (sizeof(max)*CHAR_BIT - 1)); + + plan_tests(100*2 + 100*2 + 100*2); + /* Simple median test: should always be around 0. */ + for (i = 0; i < 100; i++) { + tally_add(tally, i); + tally_add(tally, -i); + median = tally_approx_median(tally, &err); + ok1(err <= 4); + ok1(median - (ssize_t)err <= 0 && median + (ssize_t)err >= 0); + } + + /* Works for big values too... */ + for (i = 0; i < 100; i++) { + tally_add(tally, max - i); + tally_add(tally, min + 1 + i); + median = tally_approx_median(tally, &err); + /* Error should be < 100th of max - min. */ + ok1(err <= max / 100 * 2); + ok1(median - (ssize_t)err <= 0 && median + (ssize_t)err >= 0); + } + free(tally); + + tally = tally_new(10); + for (i = 0; i < 100; i++) { + tally_add(tally, i); + median = tally_approx_median(tally, &err); + ok1(err <= i / 10 + 1); + ok1(median - (ssize_t)err <= i/2 + && median + (ssize_t)err >= i/2); + } + free(tally); + + return exit_status(); +} diff --git a/lib/ccan/tally/test/run-min-max.c b/lib/ccan/tally/test/run-min-max.c new file mode 100644 index 0000000000..c92f6d382a --- /dev/null +++ b/lib/ccan/tally/test/run-min-max.c @@ -0,0 +1,21 @@ +#include <ccan/tally/tally.c> +#include <ccan/tap/tap.h> + +int main(void) +{ + int i; + struct tally *tally = tally_new(0); + + plan_tests(100 * 4); + /* Test max, min and num. */ + for (i = 0; i < 100; i++) { + tally_add(tally, i); + ok1(tally_num(tally) == i*2 + 1); + tally_add(tally, -i); + ok1(tally_num(tally) == i*2 + 2); + ok1(tally_max(tally) == i); + ok1(tally_min(tally) == -i); + } + free(tally); + return exit_status(); +} diff --git a/lib/ccan/tally/test/run-mode.c b/lib/ccan/tally/test/run-mode.c new file mode 100644 index 0000000000..cd2f230443 --- /dev/null +++ b/lib/ccan/tally/test/run-mode.c @@ -0,0 +1,46 @@ +#include <ccan/tally/tally.c> +#include <ccan/tap/tap.h> + +int main(void) +{ + int i; + struct tally *tally = tally_new(100); + ssize_t min, max, mode; + size_t err; + + max = (ssize_t)~(1ULL << (sizeof(max)*CHAR_BIT - 1)); + min = (ssize_t)(1ULL << (sizeof(max)*CHAR_BIT - 1)); + + plan_tests(100 + 50 + 100 + 100 + 10); + /* Simple mode test: should always be around 0 (we add that twice). */ + for (i = 0; i < 100; i++) { + tally_add(tally, i); + tally_add(tally, -i); + mode = tally_approx_mode(tally, &err); + if (i < 50) + ok1(err == 0); + ok1(mode - (ssize_t)err <= 0 && mode + (ssize_t)err >= 0); + } + + /* Works for big values too... */ + for (i = 0; i < 100; i++) { + tally_add(tally, max - i); + tally_add(tally, min + 1 + i); + mode = tally_approx_mode(tally, &err); + ok1(mode - (ssize_t)err <= 0 && mode + (ssize_t)err >= 0); + } + free(tally); + + tally = tally_new(10); + tally_add(tally, 0); + for (i = 0; i < 100; i++) { + tally_add(tally, i); + mode = tally_approx_mode(tally, &err); + if (i < 10) + ok1(err == 0); + ok1(mode - (ssize_t)err <= 0 && mode + (ssize_t)err >= 0); + } + + free(tally); + return exit_status(); +} diff --git a/lib/ccan/tally/test/run-renormalize.c b/lib/ccan/tally/test/run-renormalize.c new file mode 100644 index 0000000000..8fe9dbce32 --- /dev/null +++ b/lib/ccan/tally/test/run-renormalize.c @@ -0,0 +1,26 @@ +#include <ccan/tally/tally.c> +#include <ccan/tap/tap.h> + +int main(void) +{ + struct tally *tally = tally_new(2); + + plan_tests(4); + tally->min = 0; + tally->max = 0; + tally->counts[0] = 1; + + /* This renormalize should do nothing. */ + renormalize(tally, 0, 1); + ok1(tally->counts[0] == 1); + ok1(tally->counts[1] == 0); + tally->counts[1]++; + + /* This renormalize should collapse both into bucket 0. */ + renormalize(tally, 0, 3); + ok1(tally->counts[0] == 2); + ok1(tally->counts[1] == 0); + + free(tally); + return exit_status(); +} diff --git a/lib/ccan/tally/test/run-total.c b/lib/ccan/tally/test/run-total.c new file mode 100644 index 0000000000..d7d73e58a5 --- /dev/null +++ b/lib/ccan/tally/test/run-total.c @@ -0,0 +1,56 @@ +#include <ccan/tally/tally.c> +#include <ccan/tap/tap.h> + +int main(void) +{ + struct tally *tally; + ssize_t total, overflow; + ssize_t min, max; + + max = (ssize_t)~(1ULL << (sizeof(max)*CHAR_BIT - 1)); + min = (ssize_t)(1ULL << (sizeof(max)*CHAR_BIT - 1)); + + plan_tests(15); + + /* Simple case. */ + tally = tally_new(0); + tally_add(tally, min); + ok1(tally_total(tally, NULL) == min); + ok1(tally_total(tally, &overflow) == min); + ok1(overflow == -1); + + /* Underflow. */ + tally_add(tally, min); + total = tally_total(tally, &overflow); + ok1(overflow == -1); + ok1((size_t)total == 0); + ok1(tally_total(tally, NULL) == min); + free(tally); + + /* Simple case. */ + tally = tally_new(0); + tally_add(tally, max); + ok1(tally_total(tally, NULL) == max); + ok1(tally_total(tally, &overflow) == max); + ok1(overflow == 0); + + /* Overflow into sign bit... */ + tally_add(tally, max); + total = tally_total(tally, &overflow); + ok1(overflow == 0); + ok1((size_t)total == (size_t)-2); + ok1(tally_total(tally, NULL) == max); + + /* Overflow into upper size_t. */ + tally_add(tally, max); + total = tally_total(tally, &overflow); + ok1(overflow == 1); + if (sizeof(size_t) == 4) + ok1((size_t)total == 0x7FFFFFFD); + else if (sizeof(size_t) == 8) + ok1((size_t)total == 0x7FFFFFFFFFFFFFFDULL); + ok1(tally_total(tally, NULL) == max); + free(tally); + + return exit_status(); +} diff --git a/lib/ccan/typesafe_cb/LICENSE b/lib/ccan/typesafe_cb/LICENSE new file mode 100644 index 0000000000..5522aa5f33 --- /dev/null +++ b/lib/ccan/typesafe_cb/LICENSE @@ -0,0 +1,508 @@ + + GNU LESSER GENERAL PUBLIC LICENSE + Version 2.1, February 1999 + + Copyright (C) 1991, 1999 Free Software Foundation, Inc. + 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + +[This is the first released version of the Lesser GPL. It also counts + as the successor of the GNU Library Public License, version 2, hence + the version number 2.1.] + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +Licenses are intended to guarantee your freedom to share and change +free software--to make sure the software is free for all its users. + + This license, the Lesser General Public License, applies to some +specially designated software packages--typically libraries--of the +Free Software Foundation and other authors who decide to use it. You +can use it too, but we suggest you first think carefully about whether +this license or the ordinary General Public License is the better +strategy to use in any particular case, based on the explanations +below. + + When we speak of free software, we are referring to freedom of use, +not price. Our General Public Licenses are designed to make sure that +you have the freedom to distribute copies of free software (and charge +for this service if you wish); that you receive source code or can get +it if you want it; that you can change the software and use pieces of +it in new free programs; and that you are informed that you can do +these things. + + To protect your rights, we need to make restrictions that forbid +distributors to deny you these rights or to ask you to surrender these +rights. These restrictions translate to certain responsibilities for +you if you distribute copies of the library or if you modify it. + + For example, if you distribute copies of the library, whether gratis +or for a fee, you must give the recipients all the rights that we gave +you. You must make sure that they, too, receive or can get the source +code. If you link other code with the library, you must provide +complete object files to the recipients, so that they can relink them +with the library after making changes to the library and recompiling +it. And you must show them these terms so they know their rights. + + We protect your rights with a two-step method: (1) we copyright the +library, and (2) we offer you this license, which gives you legal +permission to copy, distribute and/or modify the library. + + To protect each distributor, we want to make it very clear that +there is no warranty for the free library. Also, if the library is +modified by someone else and passed on, the recipients should know +that what they have is not the original version, so that the original +author's reputation will not be affected by problems that might be +introduced by others. + + Finally, software patents pose a constant threat to the existence of +any free program. We wish to make sure that a company cannot +effectively restrict the users of a free program by obtaining a +restrictive license from a patent holder. Therefore, we insist that +any patent license obtained for a version of the library must be +consistent with the full freedom of use specified in this license. + + Most GNU software, including some libraries, is covered by the +ordinary GNU General Public License. This license, the GNU Lesser +General Public License, applies to certain designated libraries, and +is quite different from the ordinary General Public License. We use +this license for certain libraries in order to permit linking those +libraries into non-free programs. + + When a program is linked with a library, whether statically or using +a shared library, the combination of the two is legally speaking a +combined work, a derivative of the original library. The ordinary +General Public License therefore permits such linking only if the +entire combination fits its criteria of freedom. The Lesser General +Public License permits more lax criteria for linking other code with +the library. + + We call this license the "Lesser" General Public License because it +does Less to protect the user's freedom than the ordinary General +Public License. It also provides other free software developers Less +of an advantage over competing non-free programs. These disadvantages +are the reason we use the ordinary General Public License for many +libraries. However, the Lesser license provides advantages in certain +special circumstances. + + For example, on rare occasions, there may be a special need to +encourage the widest possible use of a certain library, so that it +becomes a de-facto standard. To achieve this, non-free programs must +be allowed to use the library. A more frequent case is that a free +library does the same job as widely used non-free libraries. In this +case, there is little to gain by limiting the free library to free +software only, so we use the Lesser General Public License. + + In other cases, permission to use a particular library in non-free +programs enables a greater number of people to use a large body of +free software. For example, permission to use the GNU C Library in +non-free programs enables many more people to use the whole GNU +operating system, as well as its variant, the GNU/Linux operating +system. + + Although the Lesser General Public License is Less protective of the +users' freedom, it does ensure that the user of a program that is +linked with the Library has the freedom and the wherewithal to run +that program using a modified version of the Library. + + The precise terms and conditions for copying, distribution and +modification follow. Pay close attention to the difference between a +"work based on the library" and a "work that uses the library". The +former contains code derived from the library, whereas the latter must +be combined with the library in order to run. + + GNU LESSER GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License Agreement applies to any software library or other +program which contains a notice placed by the copyright holder or +other authorized party saying it may be distributed under the terms of +this Lesser General Public License (also called "this License"). +Each licensee is addressed as "you". + + A "library" means a collection of software functions and/or data +prepared so as to be conveniently linked with application programs +(which use some of those functions and data) to form executables. + + The "Library", below, refers to any such software library or work +which has been distributed under these terms. A "work based on the +Library" means either the Library or any derivative work under +copyright law: that is to say, a work containing the Library or a +portion of it, either verbatim or with modifications and/or translated +straightforwardly into another language. (Hereinafter, translation is +included without limitation in the term "modification".) + + "Source code" for a work means the preferred form of the work for +making modifications to it. For a library, complete source code means +all the source code for all modules it contains, plus any associated +interface definition files, plus the scripts used to control +compilation and installation of the library. + + Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running a program using the Library is not restricted, and output from +such a program is covered only if its contents constitute a work based +on the Library (independent of the use of the Library in a tool for +writing it). Whether that is true depends on what the Library does +and what the program that uses the Library does. + + 1. You may copy and distribute verbatim copies of the Library's +complete source code as you receive it, in any medium, provided that +you conspicuously and appropriately publish on each copy an +appropriate copyright notice and disclaimer of warranty; keep intact +all the notices that refer to this License and to the absence of any +warranty; and distribute a copy of this License along with the +Library. + + You may charge a fee for the physical act of transferring a copy, +and you may at your option offer warranty protection in exchange for a +fee. + + 2. You may modify your copy or copies of the Library or any portion +of it, thus forming a work based on the Library, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) The modified work must itself be a software library. + + b) You must cause the files modified to carry prominent notices + stating that you changed the files and the date of any change. + + c) You must cause the whole of the work to be licensed at no + charge to all third parties under the terms of this License. + + d) If a facility in the modified Library refers to a function or a + table of data to be supplied by an application program that uses + the facility, other than as an argument passed when the facility + is invoked, then you must make a good faith effort to ensure that, + in the event an application does not supply such function or + table, the facility still operates, and performs whatever part of + its purpose remains meaningful. + + (For example, a function in a library to compute square roots has + a purpose that is entirely well-defined independent of the + application. Therefore, Subsection 2d requires that any + application-supplied function or table used by this function must + be optional: if the application does not supply it, the square + root function must still compute square roots.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Library, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Library, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote +it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Library. + +In addition, mere aggregation of another work not based on the Library +with the Library (or with a work based on the Library) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may opt to apply the terms of the ordinary GNU General Public +License instead of this License to a given copy of the Library. To do +this, you must alter all the notices that refer to this License, so +that they refer to the ordinary GNU General Public License, version 2, +instead of to this License. (If a newer version than version 2 of the +ordinary GNU General Public License has appeared, then you can specify +that version instead if you wish.) Do not make any other change in +these notices. + + Once this change is made in a given copy, it is irreversible for +that copy, so the ordinary GNU General Public License applies to all +subsequent copies and derivative works made from that copy. + + This option is useful when you wish to copy part of the code of +the Library into a program that is not a library. + + 4. You may copy and distribute the Library (or a portion or +derivative of it, under Section 2) in object code or executable form +under the terms of Sections 1 and 2 above provided that you accompany +it with the complete corresponding machine-readable source code, which +must be distributed under the terms of Sections 1 and 2 above on a +medium customarily used for software interchange. + + If distribution of object code is made by offering access to copy +from a designated place, then offering equivalent access to copy the +source code from the same place satisfies the requirement to +distribute the source code, even though third parties are not +compelled to copy the source along with the object code. + + 5. A program that contains no derivative of any portion of the +Library, but is designed to work with the Library by being compiled or +linked with it, is called a "work that uses the Library". Such a +work, in isolation, is not a derivative work of the Library, and +therefore falls outside the scope of this License. + + However, linking a "work that uses the Library" with the Library +creates an executable that is a derivative of the Library (because it +contains portions of the Library), rather than a "work that uses the +library". The executable is therefore covered by this License. +Section 6 states terms for distribution of such executables. + + When a "work that uses the Library" uses material from a header file +that is part of the Library, the object code for the work may be a +derivative work of the Library even though the source code is not. +Whether this is true is especially significant if the work can be +linked without the Library, or if the work is itself a library. The +threshold for this to be true is not precisely defined by law. + + If such an object file uses only numerical parameters, data +structure layouts and accessors, and small macros and small inline +functions (ten lines or less in length), then the use of the object +file is unrestricted, regardless of whether it is legally a derivative +work. (Executables containing this object code plus portions of the +Library will still fall under Section 6.) + + Otherwise, if the work is a derivative of the Library, you may +distribute the object code for the work under the terms of Section 6. +Any executables containing that work also fall under Section 6, +whether or not they are linked directly with the Library itself. + + 6. As an exception to the Sections above, you may also combine or +link a "work that uses the Library" with the Library to produce a +work containing portions of the Library, and distribute that work +under terms of your choice, provided that the terms permit +modification of the work for the customer's own use and reverse +engineering for debugging such modifications. + + You must give prominent notice with each copy of the work that the +Library is used in it and that the Library and its use are covered by +this License. You must supply a copy of this License. If the work +during execution displays copyright notices, you must include the +copyright notice for the Library among them, as well as a reference +directing the user to the copy of this License. Also, you must do one +of these things: + + a) Accompany the work with the complete corresponding + machine-readable source code for the Library including whatever + changes were used in the work (which must be distributed under + Sections 1 and 2 above); and, if the work is an executable linked + with the Library, with the complete machine-readable "work that + uses the Library", as object code and/or source code, so that the + user can modify the Library and then relink to produce a modified + executable containing the modified Library. (It is understood + that the user who changes the contents of definitions files in the + Library will not necessarily be able to recompile the application + to use the modified definitions.) + + b) Use a suitable shared library mechanism for linking with the + Library. A suitable mechanism is one that (1) uses at run time a + copy of the library already present on the user's computer system, + rather than copying library functions into the executable, and (2) + will operate properly with a modified version of the library, if + the user installs one, as long as the modified version is + interface-compatible with the version that the work was made with. + + c) Accompany the work with a written offer, valid for at least + three years, to give the same user the materials specified in + Subsection 6a, above, for a charge no more than the cost of + performing this distribution. + + d) If distribution of the work is made by offering access to copy + from a designated place, offer equivalent access to copy the above + specified materials from the same place. + + e) Verify that the user has already received a copy of these + materials or that you have already sent this user a copy. + + For an executable, the required form of the "work that uses the +Library" must include any data and utility programs needed for +reproducing the executable from it. However, as a special exception, +the materials to be distributed need not include anything that is +normally distributed (in either source or binary form) with the major +components (compiler, kernel, and so on) of the operating system on +which the executable runs, unless that component itself accompanies +the executable. + + It may happen that this requirement contradicts the license +restrictions of other proprietary libraries that do not normally +accompany the operating system. Such a contradiction means you cannot +use both them and the Library together in an executable that you +distribute. + + 7. You may place library facilities that are a work based on the +Library side-by-side in a single library together with other library +facilities not covered by this License, and distribute such a combined +library, provided that the separate distribution of the work based on +the Library and of the other library facilities is otherwise +permitted, and provided that you do these two things: + + a) Accompany the combined library with a copy of the same work + based on the Library, uncombined with any other library + facilities. This must be distributed under the terms of the + Sections above. + + b) Give prominent notice with the combined library of the fact + that part of it is a work based on the Library, and explaining + where to find the accompanying uncombined form of the same work. + + 8. You may not copy, modify, sublicense, link with, or distribute +the Library except as expressly provided under this License. Any +attempt otherwise to copy, modify, sublicense, link with, or +distribute the Library is void, and will automatically terminate your +rights under this License. However, parties who have received copies, +or rights, from you under this License will not have their licenses +terminated so long as such parties remain in full compliance. + + 9. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Library or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Library (or any work based on the +Library), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Library or works based on it. + + 10. Each time you redistribute the Library (or any work based on the +Library), the recipient automatically receives a license from the +original licensor to copy, distribute, link with or modify the Library +subject to these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties with +this License. + + 11. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Library at all. For example, if a patent +license would not permit royalty-free redistribution of the Library by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Library. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply, and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 12. If the distribution and/or use of the Library is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Library under this License +may add an explicit geographical distribution limitation excluding those +countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 13. The Free Software Foundation may publish revised and/or new +versions of the Lesser General Public License from time to time. +Such new versions will be similar in spirit to the present version, +but may differ in detail to address new problems or concerns. + +Each version is given a distinguishing version number. If the Library +specifies a version number of this License which applies to it and +"any later version", you have the option of following the terms and +conditions either of that version or of any later version published by +the Free Software Foundation. If the Library does not specify a +license version number, you may choose any version ever published by +the Free Software Foundation. + + 14. If you wish to incorporate parts of the Library into other free +programs whose distribution conditions are incompatible with these, +write to the author to ask for permission. For software which is +copyrighted by the Free Software Foundation, write to the Free +Software Foundation; we sometimes make exceptions for this. Our +decision will be guided by the two goals of preserving the free status +of all derivatives of our free software and of promoting the sharing +and reuse of software generally. + + NO WARRANTY + + 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO +WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. +EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR +OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY +KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE +LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME +THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN +WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY +AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU +FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR +CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE +LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING +RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A +FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF +SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH +DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Libraries + + If you develop a new library, and you want it to be of the greatest +possible use to the public, we recommend making it free software that +everyone can redistribute and change. You can do so by permitting +redistribution under these terms (or, alternatively, under the terms +of the ordinary General Public License). + + To apply these terms, attach the following notices to the library. +It is safest to attach them to the start of each source file to most +effectively convey the exclusion of warranty; and each file should +have at least the "copyright" line and a pointer to where the full +notice is found. + + + <one line to give the library's name and a brief idea of what it does.> + Copyright (C) <year> <name of author> + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +Also add information on how to contact you by electronic and paper mail. + +You should also get your employer (if you work as a programmer) or +your school, if any, to sign a "copyright disclaimer" for the library, +if necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the + library `Frob' (a library for tweaking knobs) written by James + Random Hacker. + + <signature of Ty Coon>, 1 April 1990 + Ty Coon, President of Vice + +That's all there is to it! diff --git a/lib/ccan/typesafe_cb/_info b/lib/ccan/typesafe_cb/_info new file mode 100644 index 0000000000..4f4570afc9 --- /dev/null +++ b/lib/ccan/typesafe_cb/_info @@ -0,0 +1,151 @@ +#include <stdio.h> +#include <string.h> +#include "config.h" + +/** + * typesafe_cb - macros for safe callbacks. + * + * The basis of the typesafe_cb header is typesafe_cb_cast(): a + * conditional cast macro. If an expression exactly matches a given + * type, it is cast to the target type, otherwise it is left alone. + * + * This allows us to create functions which take a small number of + * specific types, rather than being forced to use a void *. In + * particular, it is useful for creating typesafe callbacks as the + * helpers typesafe_cb(), typesafe_cb_preargs() and + * typesafe_cb_postargs() demonstrate. + * + * The standard way of passing arguments to callback functions in C is + * to use a void pointer, which the callback then casts back to the + * expected type. This unfortunately subverts the type checking the + * compiler would perform if it were a direct call. Here's an example: + * + * static void my_callback(void *_obj) + * { + * struct obj *obj = _obj; + * ... + * } + * ... + * register_callback(my_callback, &my_obj); + * + * If we wanted to use the natural type for my_callback (ie. "void + * my_callback(struct obj *obj)"), we could make register_callback() + * take a void * as its first argument, but this would subvert all + * type checking. We really want register_callback() to accept only + * the exactly correct function type to match the argument, or a + * function which takes a void *. + * + * This is where typesafe_cb() comes in: it uses typesafe_cb_cast() to + * cast the callback function if it matches the argument type: + * + * void _register_callback(void (*cb)(void *arg), void *arg); + * #define register_callback(cb, arg) \ + * _register_callback(typesafe_cb(void, void *, (cb), (arg)), \ + * (arg)) + * + * On compilers which don't support the extensions required + * typesafe_cb_cast() and friend become an unconditional cast, so your + * code will compile but you won't get type checking. + * + * Example: + * #include <ccan/typesafe_cb/typesafe_cb.h> + * #include <stdlib.h> + * #include <stdio.h> + * + * // Generic callback infrastructure. + * struct callback { + * struct callback *next; + * int value; + * int (*callback)(int value, void *arg); + * void *arg; + * }; + * static struct callback *callbacks; + * + * static void _register_callback(int value, int (*cb)(int, void *), + * void *arg) + * { + * struct callback *new = malloc(sizeof(*new)); + * new->next = callbacks; + * new->value = value; + * new->callback = cb; + * new->arg = arg; + * callbacks = new; + * } + * #define register_callback(value, cb, arg) \ + * _register_callback(value, \ + * typesafe_cb_preargs(int, void *, \ + * (cb), (arg), int),\ + * (arg)) + * + * static struct callback *find_callback(int value) + * { + * struct callback *i; + * + * for (i = callbacks; i; i = i->next) + * if (i->value == value) + * return i; + * return NULL; + * } + * + * // Define several silly callbacks. Note they don't use void *! + * #define DEF_CALLBACK(name, op) \ + * static int name(int val, int *arg) \ + * { \ + * printf("%s", #op); \ + * return val op *arg; \ + * } + * DEF_CALLBACK(multiply, *); + * DEF_CALLBACK(add, +); + * DEF_CALLBACK(divide, /); + * DEF_CALLBACK(sub, -); + * DEF_CALLBACK(or, |); + * DEF_CALLBACK(and, &); + * DEF_CALLBACK(xor, ^); + * DEF_CALLBACK(assign, =); + * + * // Silly game to find the longest chain of values. + * int main(int argc, char *argv[]) + * { + * int i, run = 1, num = argv[1] ? atoi(argv[1]) : 0; + * + * for (i = 1; i < 1024;) { + * // Since run is an int, compiler checks "add" does too. + * register_callback(i++, add, &run); + * register_callback(i++, divide, &run); + * register_callback(i++, sub, &run); + * register_callback(i++, multiply, &run); + * register_callback(i++, or, &run); + * register_callback(i++, and, &run); + * register_callback(i++, xor, &run); + * register_callback(i++, assign, &run); + * } + * + * printf("%i ", num); + * while (run < 56) { + * struct callback *cb = find_callback(num % i); + * if (!cb) { + * printf("-> STOP\n"); + * return 1; + * } + * num = cb->callback(num, cb->arg); + * printf("->%i ", num); + * run++; + * } + * printf("-> Winner!\n"); + * return 0; + * } + * + * License: LGPL (2 or any later version) + * Author: Rusty Russell <rusty@rustcorp.com.au> + */ +int main(int argc, char *argv[]) +{ + if (argc != 2) + return 1; + + if (strcmp(argv[1], "depends") == 0) { + return 0; + } + + return 1; +} diff --git a/lib/ccan/typesafe_cb/test/compile_fail-cast_if_any.c b/lib/ccan/typesafe_cb/test/compile_fail-cast_if_any.c new file mode 100644 index 0000000000..dfb51167ff --- /dev/null +++ b/lib/ccan/typesafe_cb/test/compile_fail-cast_if_any.c @@ -0,0 +1,42 @@ +#include <ccan/typesafe_cb/typesafe_cb.h> +#include <stdlib.h> + +struct foo { + int x; +}; + +struct bar { + int x; +}; + +struct baz { + int x; +}; + +struct any { + int x; +}; + +struct other { + int x; +}; + +static void take_any(struct any *any) +{ +} + +int main(int argc, char *argv[]) +{ +#ifdef FAIL + struct other +#if !HAVE_TYPEOF||!HAVE_BUILTIN_CHOOSE_EXPR||!HAVE_BUILTIN_TYPES_COMPATIBLE_P +#error "Unfortunately we don't fail if cast_if_type is a noop." +#endif +#else + struct foo +#endif + *arg = NULL; + take_any(cast_if_any(struct any *, arg, arg, + struct foo *, struct bar *, struct baz *)); + return 0; +} diff --git a/lib/ccan/typesafe_cb/test/compile_fail-cast_if_type-promotable.c b/lib/ccan/typesafe_cb/test/compile_fail-cast_if_type-promotable.c new file mode 100644 index 0000000000..11d42f4c6b --- /dev/null +++ b/lib/ccan/typesafe_cb/test/compile_fail-cast_if_type-promotable.c @@ -0,0 +1,23 @@ +#include <ccan/typesafe_cb/typesafe_cb.h> +#include <stdbool.h> + +static void _set_some_value(void *val) +{ +} + +#define set_some_value(expr) \ + _set_some_value(typesafe_cb_cast(void *, long, (expr))) + +int main(int argc, char *argv[]) +{ +#ifdef FAIL + bool x = 0; +#if !HAVE_TYPEOF||!HAVE_BUILTIN_CHOOSE_EXPR||!HAVE_BUILTIN_TYPES_COMPATIBLE_P +#error "Unfortunately we don't fail if typesafe_cb_cast is a noop." +#endif +#else + long x = 0; +#endif + set_some_value(x); + return 0; +} diff --git a/lib/ccan/typesafe_cb/test/compile_fail-cast_if_type.c b/lib/ccan/typesafe_cb/test/compile_fail-cast_if_type.c new file mode 100644 index 0000000000..610793514f --- /dev/null +++ b/lib/ccan/typesafe_cb/test/compile_fail-cast_if_type.c @@ -0,0 +1,25 @@ +#include <ccan/typesafe_cb/typesafe_cb.h> + +void _set_some_value(void *val); + +void _set_some_value(void *val) +{ +} + +#define set_some_value(expr) \ + _set_some_value(cast_if_type(void *, (expr), (expr), unsigned long)) + +int main(int argc, char *argv[]) +{ +#ifdef FAIL + int x = 0; + set_some_value(x); +#if !HAVE_TYPEOF||!HAVE_BUILTIN_CHOOSE_EXPR||!HAVE_BUILTIN_TYPES_COMPATIBLE_P +#error "Unfortunately we don't fail if cast_if_type is a noop." +#endif +#else + void *p = 0; + set_some_value(p); +#endif + return 0; +} diff --git a/lib/ccan/typesafe_cb/test/compile_fail-typesafe_cb-int.c b/lib/ccan/typesafe_cb/test/compile_fail-typesafe_cb-int.c new file mode 100644 index 0000000000..c4033364d4 --- /dev/null +++ b/lib/ccan/typesafe_cb/test/compile_fail-typesafe_cb-int.c @@ -0,0 +1,27 @@ +#include <ccan/typesafe_cb/typesafe_cb.h> +#include <stdlib.h> + +void _callback(void (*fn)(void *arg), void *arg); +void _callback(void (*fn)(void *arg), void *arg) +{ + fn(arg); +} + +/* Callback is set up to warn if arg isn't a pointer (since it won't + * pass cleanly to _callback's second arg. */ +#define callback(fn, arg) \ + _callback(typesafe_cb(void, (fn), (arg)), (arg)) + +void my_callback(int something); +void my_callback(int something) +{ +} + +int main(int argc, char *argv[]) +{ +#ifdef FAIL + /* This fails due to arg, not due to cast. */ + callback(my_callback, 100); +#endif + return 0; +} diff --git a/lib/ccan/typesafe_cb/test/compile_fail-typesafe_cb.c b/lib/ccan/typesafe_cb/test/compile_fail-typesafe_cb.c new file mode 100644 index 0000000000..81e36d7b87 --- /dev/null +++ b/lib/ccan/typesafe_cb/test/compile_fail-typesafe_cb.c @@ -0,0 +1,34 @@ +#include <ccan/typesafe_cb/typesafe_cb.h> +#include <stdlib.h> + +static void _register_callback(void (*cb)(void *arg), void *arg) +{ +} + +#define register_callback(cb, arg) \ + _register_callback(typesafe_cb(void, void *, (cb), (arg)), (arg)) + +static void my_callback(char *p) +{ +} + +int main(int argc, char *argv[]) +{ + char str[] = "hello world"; +#ifdef FAIL + int *p; +#if !HAVE_TYPEOF||!HAVE_BUILTIN_CHOOSE_EXPR||!HAVE_BUILTIN_TYPES_COMPATIBLE_P +#error "Unfortunately we don't fail if typesafe_cb_cast is a noop." +#endif +#else + char *p; +#endif + p = NULL; + + /* This should work always. */ + register_callback(my_callback, str); + + /* This will fail with FAIL defined */ + register_callback(my_callback, p); + return 0; +} diff --git a/lib/ccan/typesafe_cb/test/compile_fail-typesafe_cb_cast-multi.c b/lib/ccan/typesafe_cb/test/compile_fail-typesafe_cb_cast-multi.c new file mode 100644 index 0000000000..62b5f91e18 --- /dev/null +++ b/lib/ccan/typesafe_cb/test/compile_fail-typesafe_cb_cast-multi.c @@ -0,0 +1,43 @@ +#include <ccan/typesafe_cb/typesafe_cb.h> +#include <stdlib.h> + +struct foo { + int x; +}; + +struct bar { + int x; +}; + +struct baz { + int x; +}; + +struct any { + int x; +}; + +struct other { + int x; +}; + +static void take_any(struct any *any) +{ +} + +int main(int argc, char *argv[]) +{ +#ifdef FAIL + struct other +#if !HAVE_TYPEOF||!HAVE_BUILTIN_CHOOSE_EXPR||!HAVE_BUILTIN_TYPES_COMPATIBLE_P +#error "Unfortunately we don't fail if typesafe_cb_cast is a noop." +#endif +#else + struct foo +#endif + *arg = NULL; + take_any(typesafe_cb_cast3(struct any *, + struct foo *, struct bar *, struct baz *, + arg)); + return 0; +} diff --git a/lib/ccan/typesafe_cb/test/compile_fail-typesafe_cb_cast.c b/lib/ccan/typesafe_cb/test/compile_fail-typesafe_cb_cast.c new file mode 100644 index 0000000000..d2e6f2ab40 --- /dev/null +++ b/lib/ccan/typesafe_cb/test/compile_fail-typesafe_cb_cast.c @@ -0,0 +1,25 @@ +#include <ccan/typesafe_cb/typesafe_cb.h> + +void _set_some_value(void *val); + +void _set_some_value(void *val) +{ +} + +#define set_some_value(expr) \ + _set_some_value(typesafe_cb_cast(void *, unsigned long, (expr))) + +int main(int argc, char *argv[]) +{ +#ifdef FAIL + int x = 0; + set_some_value(x); +#if !HAVE_TYPEOF||!HAVE_BUILTIN_CHOOSE_EXPR||!HAVE_BUILTIN_TYPES_COMPATIBLE_P +#error "Unfortunately we don't fail if typesafe_cb_cast is a noop." +#endif +#else + void *p = 0; + set_some_value(p); +#endif + return 0; +} diff --git a/lib/ccan/typesafe_cb/test/compile_fail-typesafe_cb_exact.c b/lib/ccan/typesafe_cb/test/compile_fail-typesafe_cb_exact.c new file mode 100644 index 0000000000..0f61d5decd --- /dev/null +++ b/lib/ccan/typesafe_cb/test/compile_fail-typesafe_cb_exact.c @@ -0,0 +1,33 @@ +#include <ccan/typesafe_cb/typesafe_cb.h> +#include <stdlib.h> + +static void _register_callback(void (*cb)(void *arg), const void *arg) +{ +} + +#define register_callback(cb, arg) \ + _register_callback(typesafe_cb_exact(void, (cb), (arg)), (arg)) + +static void my_callback(const char *p) +{ +} + +int main(int argc, char *argv[]) +{ +#ifdef FAIL + char *p; +#if !HAVE_TYPEOF||!HAVE_BUILTIN_CHOOSE_EXPR||!HAVE_BUILTIN_TYPES_COMPATIBLE_P +#error "Unfortunately we don't fail if cast_if_type is a noop." +#endif +#else + const char *p; +#endif + p = NULL; + + /* This should work always. */ + register_callback(my_callback, (const char *)"hello world"); + + /* This will fail with FAIL defined */ + register_callback(my_callback, p); + return 0; +} diff --git a/lib/ccan/typesafe_cb/test/compile_fail-typesafe_cb_postargs.c b/lib/ccan/typesafe_cb/test/compile_fail-typesafe_cb_postargs.c new file mode 100644 index 0000000000..7d3530851d --- /dev/null +++ b/lib/ccan/typesafe_cb/test/compile_fail-typesafe_cb_postargs.c @@ -0,0 +1,27 @@ +#include <ccan/typesafe_cb/typesafe_cb.h> +#include <stdlib.h> + +static void _register_callback(void (*cb)(void *arg, int x), void *arg) +{ +} +#define register_callback(cb, arg) \ + _register_callback(typesafe_cb_postargs(void, void *, (cb), (arg), int), (arg)) + +static void my_callback(char *p, int x) +{ +} + +int main(int argc, char *argv[]) +{ +#ifdef FAIL + int *p; +#if !HAVE_TYPEOF||!HAVE_BUILTIN_CHOOSE_EXPR||!HAVE_BUILTIN_TYPES_COMPATIBLE_P +#error "Unfortunately we don't fail if typesafe_cb_cast is a noop." +#endif +#else + char *p; +#endif + p = NULL; + register_callback(my_callback, p); + return 0; +} diff --git a/lib/ccan/typesafe_cb/test/compile_fail-typesafe_cb_preargs.c b/lib/ccan/typesafe_cb/test/compile_fail-typesafe_cb_preargs.c new file mode 100644 index 0000000000..bd55c6722c --- /dev/null +++ b/lib/ccan/typesafe_cb/test/compile_fail-typesafe_cb_preargs.c @@ -0,0 +1,28 @@ +#include <ccan/typesafe_cb/typesafe_cb.h> +#include <stdlib.h> + +static void _register_callback(void (*cb)(int x, void *arg), void *arg) +{ +} + +#define register_callback(cb, arg) \ + _register_callback(typesafe_cb_preargs(void, void *, (cb), (arg), int), (arg)) + +static void my_callback(int x, char *p) +{ +} + +int main(int argc, char *argv[]) +{ +#ifdef FAIL + int *p; +#if !HAVE_TYPEOF||!HAVE_BUILTIN_CHOOSE_EXPR||!HAVE_BUILTIN_TYPES_COMPATIBLE_P +#error "Unfortunately we don't fail if typesafe_cb_cast is a noop." +#endif +#else + char *p; +#endif + p = NULL; + register_callback(my_callback, p); + return 0; +} diff --git a/lib/ccan/typesafe_cb/test/compile_ok-cast_if_any.c b/lib/ccan/typesafe_cb/test/compile_ok-cast_if_any.c new file mode 100644 index 0000000000..e8f3c49406 --- /dev/null +++ b/lib/ccan/typesafe_cb/test/compile_ok-cast_if_any.c @@ -0,0 +1,41 @@ +#include <ccan/typesafe_cb/typesafe_cb.h> +#include <stdlib.h> + +struct foo { + int x; +}; + +struct bar { + int x; +}; + +struct baz { + int x; +}; + +struct any { + int x; +}; + +static void take_any(struct any *any) +{ +} + +int main(int argc, char *argv[]) +{ +#if HAVE_TYPEOF + /* Otherwise we get unused warnings for these. */ + struct foo *foo = NULL; + struct bar *bar = NULL; + struct baz *baz = NULL; +#endif + struct other *arg = NULL; + + take_any(cast_if_any(struct any *, arg, foo, + struct foo *, struct bar *, struct baz *)); + take_any(cast_if_any(struct any *, arg, bar, + struct foo *, struct bar *, struct baz *)); + take_any(cast_if_any(struct any *, arg, baz, + struct foo *, struct bar *, struct baz *)); + return 0; +} diff --git a/lib/ccan/typesafe_cb/test/compile_ok-typesafe_cb-NULL.c b/lib/ccan/typesafe_cb/test/compile_ok-typesafe_cb-NULL.c new file mode 100644 index 0000000000..265de8b14e --- /dev/null +++ b/lib/ccan/typesafe_cb/test/compile_ok-typesafe_cb-NULL.c @@ -0,0 +1,17 @@ +#include <ccan/typesafe_cb/typesafe_cb.h> +#include <stdlib.h> + +/* NULL args for callback function should be OK for normal and _def. */ + +static void _register_callback(void (*cb)(const void *arg), const void *arg) +{ +} + +#define register_callback(cb, arg) \ + _register_callback(typesafe_cb(void, const void *, (cb), (arg)), (arg)) + +int main(int argc, char *argv[]) +{ + register_callback(NULL, "hello world"); + return 0; +} diff --git a/lib/ccan/typesafe_cb/test/compile_ok-typesafe_cb-const.c b/lib/ccan/typesafe_cb/test/compile_ok-typesafe_cb-const.c new file mode 100644 index 0000000000..7c2d62ef23 --- /dev/null +++ b/lib/ccan/typesafe_cb/test/compile_ok-typesafe_cb-const.c @@ -0,0 +1,50 @@ +#include <ccan/typesafe_cb/typesafe_cb.h> +#include <stdlib.h> + +/* const args in callbacks should be OK. */ + +static void _register_callback(void (*cb)(void *arg), void *arg) +{ +} + +#define register_callback(cb, arg) \ + _register_callback(typesafe_cb(void, (cb), (arg)), (arg)) + +#define register_callback_def(cb, arg) \ + _register_callback(typesafe_cb_def(void, (cb), (arg)), (arg)) + +static void _register_callback_pre(void (*cb)(int x, void *arg), void *arg) +{ +} + +#define register_callback_pre(cb, arg) \ + _register_callback_pre(typesafe_cb_preargs(void, (cb), (arg), int), (arg)) + +static void _register_callback_post(void (*cb)(void *arg, int x), void *arg) +{ +} + +#define register_callback_post(cb, arg) \ + _register_callback_post(typesafe_cb_postargs(void, (cb), (arg), int), (arg)) + +static void my_callback(const char *p) +{ +} + +static void my_callback_pre(int x, /*const*/ char *p) +{ +} + +static void my_callback_post(/*const*/ char *p, int x) +{ +} + +int main(int argc, char *argv[]) +{ + char p[] = "hello world"; + register_callback(my_callback, p); + register_callback_def(my_callback, p); + register_callback_pre(my_callback_pre, p); + register_callback_post(my_callback_post, p); + return 0; +} diff --git a/lib/ccan/typesafe_cb/test/compile_ok-typesafe_cb-undefined.c b/lib/ccan/typesafe_cb/test/compile_ok-typesafe_cb-undefined.c new file mode 100644 index 0000000000..aa50bad6a9 --- /dev/null +++ b/lib/ccan/typesafe_cb/test/compile_ok-typesafe_cb-undefined.c @@ -0,0 +1,49 @@ +#include <ccan/typesafe_cb/typesafe_cb.h> +#include <stdlib.h> + +/* const args in callbacks should be OK. */ + +static void _register_callback(void (*cb)(void *arg), void *arg) +{ +} + +#define register_callback(cb, arg) \ + _register_callback(typesafe_cb(void, void *, (cb), (arg)), (arg)) + +static void _register_callback_pre(void (*cb)(int x, void *arg), void *arg) +{ +} + +#define register_callback_pre(cb, arg) \ + _register_callback_pre(typesafe_cb_preargs(void, void *, (cb), (arg), int), (arg)) + +static void _register_callback_post(void (*cb)(void *arg, int x), void *arg) +{ +} + +#define register_callback_post(cb, arg) \ + _register_callback_post(typesafe_cb_postargs(void, void *, (cb), (arg), int), (arg)) + +struct undefined; + +static void my_callback(struct undefined *undef) +{ +} + +static void my_callback_pre(int x, struct undefined *undef) +{ +} + +static void my_callback_post(struct undefined *undef, int x) +{ +} + +int main(int argc, char *argv[]) +{ + struct undefined *handle = NULL; + + register_callback(my_callback, handle); + register_callback_pre(my_callback_pre, handle); + register_callback_post(my_callback_post, handle); + return 0; +} diff --git a/lib/ccan/typesafe_cb/test/compile_ok-typesafe_cb-vars.c b/lib/ccan/typesafe_cb/test/compile_ok-typesafe_cb-vars.c new file mode 100644 index 0000000000..f6a2bfecbc --- /dev/null +++ b/lib/ccan/typesafe_cb/test/compile_ok-typesafe_cb-vars.c @@ -0,0 +1,52 @@ +#include <ccan/typesafe_cb/typesafe_cb.h> +#include <stdlib.h> + +/* const args in callbacks should be OK. */ + +static void _register_callback(void (*cb)(void *arg), void *arg) +{ +} + +#define register_callback(cb, arg) \ + _register_callback(typesafe_cb(void, void *, (cb), (arg)), (arg)) + +static void _register_callback_pre(void (*cb)(int x, void *arg), void *arg) +{ +} + +#define register_callback_pre(cb, arg) \ + _register_callback_pre(typesafe_cb_preargs(void, void *, (cb), (arg), int), (arg)) + +static void _register_callback_post(void (*cb)(void *arg, int x), void *arg) +{ +} + +#define register_callback_post(cb, arg) \ + _register_callback_post(typesafe_cb_postargs(void, void *, (cb), (arg), int), (arg)) + +struct undefined; + +static void my_callback(struct undefined *undef) +{ +} + +static void my_callback_pre(int x, struct undefined *undef) +{ +} + +static void my_callback_post(struct undefined *undef, int x) +{ +} + +int main(int argc, char *argv[]) +{ + struct undefined *handle = NULL; + void (*cb)(struct undefined *undef) = my_callback; + void (*pre)(int x, struct undefined *undef) = my_callback_pre; + void (*post)(struct undefined *undef, int x) = my_callback_post; + + register_callback(cb, handle); + register_callback_pre(pre, handle); + register_callback_post(post, handle); + return 0; +} diff --git a/lib/ccan/typesafe_cb/test/compile_ok-typesafe_cb-volatile.c b/lib/ccan/typesafe_cb/test/compile_ok-typesafe_cb-volatile.c new file mode 100644 index 0000000000..3fcb1ff656 --- /dev/null +++ b/lib/ccan/typesafe_cb/test/compile_ok-typesafe_cb-volatile.c @@ -0,0 +1,47 @@ +#include <ccan/typesafe_cb/typesafe_cb.h> +#include <stdlib.h> + +/* volatile args in callbacks should be OK. */ + +static void _register_callback(void (*cb)(void *arg), void *arg) +{ +} + +#define register_callback(cb, arg) \ + _register_callback(typesafe_cb(void, (cb), (arg)), (arg)) + +static void _register_callback_pre(void (*cb)(int x, void *arg), void *arg) +{ +} + +#define register_callback_pre(cb, arg) \ + _register_callback_pre(typesafe_cb_preargs(void, (cb), (arg), int), (arg)) + +static void _register_callback_post(void (*cb)(void *arg, int x), void *arg) +{ +} + +#define register_callback_post(cb, arg) \ + _register_callback_post(typesafe_cb_postargs(void, (cb), (arg), int), (arg)) + +static void my_callback(volatile char *p) +{ +} + +/* FIXME: Can't handle volatile for these */ +static void my_callback_pre(int x, /* volatile */ char *p) +{ +} + +static void my_callback_post(/* volatile */ char *p, int x) +{ +} + +int main(int argc, char *argv[]) +{ + char p[] = "hello world"; + register_callback(my_callback, p); + register_callback_pre(my_callback_pre, p); + register_callback_post(my_callback_post, p); + return 0; +} diff --git a/lib/ccan/typesafe_cb/test/compile_ok-typesafe_cb_cast.c b/lib/ccan/typesafe_cb/test/compile_ok-typesafe_cb_cast.c new file mode 100644 index 0000000000..b7f21dc094 --- /dev/null +++ b/lib/ccan/typesafe_cb/test/compile_ok-typesafe_cb_cast.c @@ -0,0 +1,41 @@ +#include <ccan/typesafe_cb/typesafe_cb.h> +#include <stdlib.h> + +struct foo { + int x; +}; + +struct bar { + int x; +}; + +struct baz { + int x; +}; + +struct any { + int x; +}; + +static void take_any(struct any *any) +{ +} + +int main(int argc, char *argv[]) +{ + /* Otherwise we get unused warnings for these. */ + struct foo *foo = NULL; + struct bar *bar = NULL; + struct baz *baz = NULL; + + take_any(typesafe_cb_cast3(struct any *, + struct foo *, struct bar *, struct baz *, + foo)); + take_any(typesafe_cb_cast3(struct any *, + struct foo *, struct bar *, struct baz *, + bar)); + take_any(typesafe_cb_cast3(struct any *, + struct foo *, struct bar *, struct baz *, + baz)); + return 0; +} diff --git a/lib/ccan/typesafe_cb/test/compile_ok-typesafe_cb_def-const.c b/lib/ccan/typesafe_cb/test/compile_ok-typesafe_cb_def-const.c new file mode 100644 index 0000000000..01e090f1dc --- /dev/null +++ b/lib/ccan/typesafe_cb/test/compile_ok-typesafe_cb_def-const.c @@ -0,0 +1,46 @@ +#include <ccan/typesafe_cb/typesafe_cb.h> +#include <stdlib.h> + +/* const args in callbacks should be OK. */ + +static void _register_callback(void (*cb)(void *arg), void *arg) +{ +} + +#define register_callback(cb, arg) \ + _register_callback(typesafe_cb(void, (cb), (arg)), (arg)) + +static void _register_callback_pre(void (*cb)(int x, void *arg), void *arg) +{ +} + +#define register_callback_pre(cb, arg) \ + _register_callback_pre(typesafe_cb_preargs(void, (cb), (arg), int), (arg)) + +static void _register_callback_post(void (*cb)(void *arg, int x), void *arg) +{ +} + +#define register_callback_post(cb, arg) \ + _register_callback_post(typesafe_cb_postargs(void, (cb), (arg), int), (arg)) + +static void my_callback(const char *p) +{ +} + +static void my_callback_pre(int x, /*const*/ char *p) +{ +} + +static void my_callback_post(/*const*/ char *p, int x) +{ +} + +int main(int argc, char *argv[]) +{ + char p[] = "hello world"; + register_callback(my_callback, p); + register_callback_pre(my_callback_pre, p); + register_callback_post(my_callback_post, p); + return 0; +} diff --git a/lib/ccan/typesafe_cb/test/run.c b/lib/ccan/typesafe_cb/test/run.c new file mode 100644 index 0000000000..116e7d1946 --- /dev/null +++ b/lib/ccan/typesafe_cb/test/run.c @@ -0,0 +1,109 @@ +#include <ccan/typesafe_cb/typesafe_cb.h> +#include <string.h> +#include <stdint.h> +#include <ccan/tap/tap.h> + +static char dummy = 0; + +/* The example usage. */ +static void _set_some_value(void *val) +{ + ok1(val == &dummy); +} + +#define set_some_value(expr) \ + _set_some_value(typesafe_cb_cast(void *, unsigned long, (expr))) + +static void _callback_onearg(void (*fn)(void *arg), void *arg) +{ + fn(arg); +} + +static void _callback_preargs(void (*fn)(int a, int b, void *arg), void *arg) +{ + fn(1, 2, arg); +} + +static void _callback_postargs(void (*fn)(void *arg, int a, int b), void *arg) +{ + fn(arg, 1, 2); +} + +#define callback_onearg(cb, arg) \ + _callback_onearg(typesafe_cb(void, void *, (cb), (arg)), (arg)) + +#define callback_preargs(cb, arg) \ + _callback_preargs(typesafe_cb_preargs(void, void *, (cb), (arg), int, int), (arg)) + +#define callback_postargs(cb, arg) \ + _callback_postargs(typesafe_cb_postargs(void, void *, (cb), (arg), int, int), (arg)) + +static void my_callback_onearg(char *p) +{ + ok1(strcmp(p, "hello world") == 0); +} + +static void my_callback_preargs(int a, int b, char *p) +{ + ok1(a == 1); + ok1(b == 2); + ok1(strcmp(p, "hello world") == 0); +} + +static void my_callback_postargs(char *p, int a, int b) +{ + ok1(a == 1); + ok1(b == 2); + ok1(strcmp(p, "hello world") == 0); +} + +/* This is simply a compile test; we promised typesafe_cb_cast can be in a + * static initializer. */ +struct callback_onearg +{ + void (*fn)(void *arg); + const void *arg; +}; + +struct callback_onearg cb_onearg += { typesafe_cb(void, void *, my_callback_onearg, (char *)(intptr_t)"hello world"), + "hello world" }; + +struct callback_preargs +{ + void (*fn)(int a, int b, void *arg); + const void *arg; +}; + +struct callback_preargs cb_preargs += { typesafe_cb_preargs(void, void *, my_callback_preargs, + (char *)(intptr_t)"hi", int, int), "hi" }; + +struct callback_postargs +{ + void (*fn)(void *arg, int a, int b); + const void *arg; +}; + +struct callback_postargs cb_postargs += { typesafe_cb_postargs(void, void *, my_callback_postargs, + (char *)(intptr_t)"hi", int, int), "hi" }; + +int main(int argc, char *argv[]) +{ + void *p = &dummy; + unsigned long l = (unsigned long)p; + char str[] = "hello world"; + + plan_tests(2 + 1 + 3 + 3); + set_some_value(p); + set_some_value(l); + + callback_onearg(my_callback_onearg, str); + + callback_preargs(my_callback_preargs, str); + + callback_postargs(my_callback_postargs, str); + + return exit_status(); +} diff --git a/lib/ccan/typesafe_cb/typesafe_cb.h b/lib/ccan/typesafe_cb/typesafe_cb.h new file mode 100644 index 0000000000..40cfa39798 --- /dev/null +++ b/lib/ccan/typesafe_cb/typesafe_cb.h @@ -0,0 +1,133 @@ +#ifndef CCAN_TYPESAFE_CB_H +#define CCAN_TYPESAFE_CB_H +#include "config.h" + +#if HAVE_TYPEOF && HAVE_BUILTIN_CHOOSE_EXPR && HAVE_BUILTIN_TYPES_COMPATIBLE_P +/** + * typesafe_cb_cast - only cast an expression if it matches a given type + * @desttype: the type to cast to + * @oktype: the type we allow + * @expr: the expression to cast + * + * This macro is used to create functions which allow multiple types. + * The result of this macro is used somewhere that a @desttype type is + * expected: if @expr is exactly of type @oktype, then it will be + * cast to @desttype type, otherwise left alone. + * + * This macro can be used in static initializers. + * + * This is merely useful for warnings: if the compiler does not + * support the primitives required for typesafe_cb_cast(), it becomes an + * unconditional cast, and the @oktype argument is not used. In + * particular, this means that @oktype can be a type which uses the + * "typeof": it will not be evaluated if typeof is not supported. + * + * Example: + * // We can take either an unsigned long or a void *. + * void _set_some_value(void *val); + * #define set_some_value(e) \ + * _set_some_value(typesafe_cb_cast(void *, (e), unsigned long)) + */ +#define typesafe_cb_cast(desttype, oktype, expr) \ + __builtin_choose_expr( \ + __builtin_types_compatible_p(__typeof__(0?(expr):(expr)), \ + oktype), \ + (desttype)(expr), (expr)) +#else +#define typesafe_cb_cast(desttype, oktype, expr) ((desttype)(expr)) +#endif + +/** + * typesafe_cb_cast3 - only cast an expression if it matches given types + * @desttype: the type to cast to + * @ok1: the first type we allow + * @ok2: the second type we allow + * @ok3: the third type we allow + * @expr: the expression to cast + * + * This is a convenient wrapper for multiple typesafe_cb_cast() calls. + * You can chain them inside each other (ie. use typesafe_cb_cast() + * for expr) if you need more than 3 arguments. + * + * Example: + * // We can take either a long, unsigned long, void * or a const void *. + * void _set_some_value(void *val); + * #define set_some_value(expr) \ + * _set_some_value(typesafe_cb_cast3(void *,, \ + * long, unsigned long, const void *,\ + * (expr))) + */ +#define typesafe_cb_cast3(desttype, ok1, ok2, ok3, expr) \ + typesafe_cb_cast(desttype, ok1, \ + typesafe_cb_cast(desttype, ok2, \ + typesafe_cb_cast(desttype, ok3, \ + (expr)))) + +/** + * typesafe_cb - cast a callback function if it matches the arg + * @rtype: the return type of the callback function + * @atype: the (pointer) type which the callback function expects. + * @fn: the callback function to cast + * @arg: the (pointer) argument to hand to the callback function. + * + * If a callback function takes a single argument, this macro does + * appropriate casts to a function which takes a single atype argument if the + * callback provided matches the @arg. + * + * It is assumed that @arg is of pointer type: usually @arg is passed + * or assigned to a void * elsewhere anyway. + * + * Example: + * void _register_callback(void (*fn)(void *arg), void *arg); + * #define register_callback(fn, arg) \ + * _register_callback(typesafe_cb(void, (fn), void*, (arg)), (arg)) + */ +#define typesafe_cb(rtype, atype, fn, arg) \ + typesafe_cb_cast(rtype (*)(atype), \ + rtype (*)(__typeof__(arg)), \ + (fn)) + +/** + * typesafe_cb_preargs - cast a callback function if it matches the arg + * @rtype: the return type of the callback function + * @atype: the (pointer) type which the callback function expects. + * @fn: the callback function to cast + * @arg: the (pointer) argument to hand to the callback function. + * + * This is a version of typesafe_cb() for callbacks that take other arguments + * before the @arg. + * + * Example: + * void _register_callback(void (*fn)(int, void *arg), void *arg); + * #define register_callback(fn, arg) \ + * _register_callback(typesafe_cb_preargs(void, (fn), void *, \ + * (arg), int), \ + * (arg)) + */ +#define typesafe_cb_preargs(rtype, atype, fn, arg, ...) \ + typesafe_cb_cast(rtype (*)(__VA_ARGS__, atype), \ + rtype (*)(__VA_ARGS__, __typeof__(arg)), \ + (fn)) + +/** + * typesafe_cb_postargs - cast a callback function if it matches the arg + * @rtype: the return type of the callback function + * @atype: the (pointer) type which the callback function expects. + * @fn: the callback function to cast + * @arg: the (pointer) argument to hand to the callback function. + * + * This is a version of typesafe_cb() for callbacks that take other arguments + * after the @arg. + * + * Example: + * void _register_callback(void (*fn)(void *arg, int), void *arg); + * #define register_callback(fn, arg) \ + * _register_callback(typesafe_cb_postargs(void, (fn), void *, \ + * (arg), int), \ + * (arg)) + */ +#define typesafe_cb_postargs(rtype, atype, fn, arg, ...) \ + typesafe_cb_cast(rtype (*)(atype, __VA_ARGS__), \ + rtype (*)(__typeof__(arg), __VA_ARGS__), \ + (fn)) +#endif /* CCAN_CAST_IF_TYPE_H */ diff --git a/lib/ccan/wscript b/lib/ccan/wscript new file mode 100644 index 0000000000..0543a4de07 --- /dev/null +++ b/lib/ccan/wscript @@ -0,0 +1,120 @@ +#!/usr/bin/env python + +import Logs, sys + +def configure(conf): + # FIXME: if they don't have -Werror, these will all fail. But they + # probably will anyway... + conf.CHECK_CODE('int __attribute__((cold)) func(int x) { return x; }', + addmain=False, link=False, cflags="-Werror", + define='HAVE_ATTRIBUTE_COLD') + conf.CHECK_CODE('int __attribute__((const)) func(int x) { return x; }', + addmain=False, link=False, cflags="-Werror", + define='HAVE_ATTRIBUTE_CONST') + conf.CHECK_CODE('void __attribute__((noreturn)) func(int x) { exit(x); }', + addmain=False, link=False, cflags="-Werror", + define='HAVE_ATTRIBUTE_NORETURN') + conf.CHECK_CODE('void __attribute__((format(__printf__, 1, 2))) func(const char *fmt, ...) { }', + addmain=False, link=False, cflags="-Werror", + define='HAVE_ATTRIBUTE_PRINTF') + conf.CHECK_CODE('int __attribute__((unused)) func(int x) { return x; }', + addmain=False, link=False, cflags="-Werror", + define='HAVE_ATTRIBUTE_UNUSED') + conf.CHECK_CODE('int __attribute__((used)) func(int x) { return x; }', + addmain=False, link=False, cflags="-Werror", + define='HAVE_ATTRIBUTE_USED') + # We try to use headers for a compile-time test. + conf.CHECK_CODE(code = """#ifdef __BYTE_ORDER + #define B __BYTE_ORDER + #elif defined(BYTE_ORDER) + #define B BYTE_ORDER + #endif + + #ifdef __LITTLE_ENDIAN + #define LITTLE __LITTLE_ENDIAN + #elif defined(LITTLE_ENDIAN) + #define LITTLE LITTLE_ENDIAN + #endif + + #if !defined(LITTLE) || !defined(B) || LITTLE != B + #error Not little endian. + #endif""", + headers="endian.h sys/endian.h", + define="HAVE_LITTLE_ENDIAN") + conf.CHECK_CODE(code = """#ifdef __BYTE_ORDER + #define B __BYTE_ORDER + #elif defined(BYTE_ORDER) + #define B BYTE_ORDER + #endif + + #ifdef __BIG_ENDIAN + #define BIG __BIG_ENDIAN + #elif defined(BIG_ENDIAN) + #define BIG BIG_ENDIAN + #endif + + #if !defined(BIG) || !defined(B) || BIG != B + #error Not big endian. + #endif""", + headers="endian.h sys/endian.h", + define="HAVE_BIG_ENDIAN") + + if not conf.CONFIG_SET("HAVE_BIG_ENDIAN") and not conf.CONFIG_SET("HAVE_LITTLE_ENDIAN"): + # That didn't work! Do runtime test. + conf.CHECK_CODE("""union { int i; char c[sizeof(int)]; } u; + u.i = 0x01020304; + return u.c[0] == 0x04 && u.c[1] == 0x03 && u.c[2] == 0x02 && u.c[3] == 0x01 ? 0 : 1;""", + addmain=True, execute=True, + define='HAVE_LITTLE_ENDIAN', + msg="Checking for HAVE_LITTLE_ENDIAN - runtime") + conf.CHECK_CODE("""union { int i; char c[sizeof(int)]; } u; + u.i = 0x01020304; + return u.c[0] == 0x01 && u.c[1] == 0x02 && u.c[2] == 0x03 && u.c[3] == 0x04 ? 0 : 1;""", + addmain=True, execute=True, + define='HAVE_BIG_ENDIAN', + msg="Checking for HAVE_BIG_ENDIAN - runtime") + + # Extra sanity check. + if conf.CONFIG_SET("HAVE_BIG_ENDIAN") == conf.CONFIG_SET("HAVE_LITTLE_ENDIAN"): + Logs.error("Failed endian determination. The PDP-11 is back?") + sys.exit(1) + + conf.CHECK_CODE('return __builtin_clz(1) == (sizeof(int)*8 - 1) ? 0 : 1;', + link=True, + define='HAVE_BUILTIN_CLZ') + conf.CHECK_CODE('return __builtin_clzl(1) == (sizeof(long)*8 - 1) ? 0 : 1;', + link=True, + define='HAVE_BUILTIN_CLZL') + conf.CHECK_CODE('return __builtin_clzll(1) == (sizeof(long long)*8 - 1) ? 0 : 1;', + link=True, + define='HAVE_BUILTIN_CLZLL') + conf.CHECK_CODE('return __builtin_constant_p(1) ? 0 : 1;', + link=True, + define='HAVE_BUILTIN_CONSTANT_P') + conf.CHECK_CODE('return __builtin_expect(main != 0, 1) ? 0 : 1;', + link=True, + define='HAVE_BUILTIN_EXPECT') + conf.CHECK_CODE('return __builtin_popcountl(255L) == 8 ? 0 : 1;', + link=True, + define='HAVE_BUILTIN_POPCOUNTL') + conf.CHECK_CODE('return __builtin_types_compatible_p(char *, int) ? 1 : 0;', + link=True, + define='HAVE_BUILTIN_TYPES_COMPATIBLE_P') + conf.CHECK_CODE('int *foo = (int[]) { 1, 2, 3, 4 }; return foo[0] ? 0 : 1;', + define='HAVE_COMPOUND_LITERALS') + conf.CHECK_CODE("""#include <ctype.h> + int main(void) { return isblank(' ') ? 0 : 1; }""", + link=True, addmain=False, add_headers=False, + define='HAVE_ISBLANK') + conf.CHECK_CODE('int x = 1; __typeof__(x) i; i = x; return i == x ? 0 : 1;', + link=True, + define='HAVE_TYPEOF') + conf.CHECK_CODE('int __attribute__((warn_unused_result)) func(int x) { return x; }', + addmain=False, link=False, cflags="-Werror", + define='HAVE_WARN_UNUSED_RESULT') + +def build(bld): + bld.SAMBA_LIBRARY('ccan', + vnum="0.1-init-1161-g661d41f", + source=bld.path.ant_glob('*/*.c'), + private_library=True) diff --git a/lib/nss_wrapper/nss_wrapper.c b/lib/nss_wrapper/nss_wrapper.c index cfa5a68712..8767fbfd89 100644 --- a/lib/nss_wrapper/nss_wrapper.c +++ b/lib/nss_wrapper/nss_wrapper.c @@ -36,7 +36,9 @@ /* defining this gives us the posix getpwnam_r() calls on solaris Thanks to heimdal for this */ +#ifndef _POSIX_PTHREAD_SEMANTICS #define _POSIX_PTHREAD_SEMANTICS +#endif #define NSS_WRAPPER_NOT_REPLACE #include "../replace/replace.h" diff --git a/lib/replace/libreplace_network.m4 b/lib/replace/libreplace_network.m4 index f9bca40ce9..eadcc6bfc1 100644 --- a/lib/replace/libreplace_network.m4 +++ b/lib/replace/libreplace_network.m4 @@ -240,12 +240,25 @@ if test x"$libreplace_cv_HAVE_GETADDRINFO" = x"yes"; then { struct addrinfo hints = {0,}; struct addrinfo *ppres; - const char hostname[] = "0.0.0.0"; + const char hostname1[] = "0.0.0.0"; + const char hostname2[] = "127.0.0.1"; + const char hostname3[] = "::"; hints.ai_socktype = SOCK_STREAM; - hints.ai_family = AF_INET; + hints.ai_family = AF_UNSPEC; hints.ai_flags = AI_NUMERICHOST|AI_PASSIVE|AI_ADDRCONFIG; - return getaddrinfo(hostname, NULL, &hints, &ppres) != 0 ? 1 : 0; + /* Test for broken flag combination on AIX. */ + if (getaddrinfo(hostname1, NULL, &hints, &ppres) == EAI_BADFLAGS) { + /* This fails on an IPv6-only box, but not with + the EAI_BADFLAGS error. */ + return 1; + } + if (getaddrinfo(hostname2, NULL, &hints, &ppres) == 0) { + /* IPv4 lookup works - good enough. */ + return 0; + } + /* Uh-oh, no IPv4. Are we IPv6-only ? */ + return getaddrinfo(hostname3, NULL, &hints, &ppres) != 0 ? 1 : 0; }], libreplace_cv_HAVE_GETADDRINFO=yes, libreplace_cv_HAVE_GETADDRINFO=no) diff --git a/lib/replace/system/kerberos.h b/lib/replace/system/kerberos.h index bb1f1b9a09..7762d4be46 100644 --- a/lib/replace/system/kerberos.h +++ b/lib/replace/system/kerberos.h @@ -37,5 +37,19 @@ #include <com_err.h> #endif +#ifdef HAVE_GSSAPI_GSSAPI_EXT_H +#include <gssapi/gssapi_ext.h> +#elif HAVE_GSSAPI_GSSAPI_H +#include <gssapi/gssapi.h> +#elif HAVE_GSSAPI_GSSAPI_GENERIC_H +#include <gssapi/gssapi_generic.h> +#elif HAVE_GSSAPI_H +#include <gssapi.h> +#endif + +#if HAVE_GSSAPI_GSSAPI_KRB5_H +#include <gssapi/gssapi_krb5.h> +#endif + #endif #endif diff --git a/lib/replace/system/network.h b/lib/replace/system/network.h index f7c1bcfacb..a4e6a7e31a 100644 --- a/lib/replace/system/network.h +++ b/lib/replace/system/network.h @@ -331,8 +331,6 @@ typedef unsigned short int sa_family_t; * which might return 512 or bigger */ # define IOV_MAX 512 -# else -# error IOV_MAX and UIO_MAXIOV undefined # endif # endif #endif diff --git a/lib/smbconf/smbconf.c b/lib/smbconf/smbconf.c index 80fe9aac37..e0441ed985 100644 --- a/lib/smbconf/smbconf.c +++ b/lib/smbconf/smbconf.c @@ -27,12 +27,13 @@ * **********************************************************************/ -static WERROR smbconf_global_check(struct smbconf_ctx *ctx) +static sbcErr smbconf_global_check(struct smbconf_ctx *ctx) { if (!smbconf_share_exists(ctx, GLOBAL_NAME)) { return smbconf_create_share(ctx, GLOBAL_NAME); } - return WERR_OK; + + return SBC_ERR_OK; } @@ -42,6 +43,41 @@ static WERROR smbconf_global_check(struct smbconf_ctx *ctx) * **********************************************************************/ +const char *sbcErrorString(sbcErr error) +{ + switch (error) { + case SBC_ERR_OK: + return "SBC_ERR_OK"; + case SBC_ERR_NOT_IMPLEMENTED: + return "SBC_ERR_NOT_IMPLEMENTED"; + case SBC_ERR_NOT_SUPPORTED: + return "SBC_ERR_NOT_SUPPORTED"; + case SBC_ERR_UNKNOWN_FAILURE: + return "SBC_ERR_UNKNOWN_FAILURE"; + case SBC_ERR_NOMEM: + return "SBC_ERR_NOMEM"; + case SBC_ERR_INVALID_PARAM: + return "SBC_ERR_INVALID_PARAM"; + case SBC_ERR_BADFILE: + return "SBC_ERR_BADFILE"; + case SBC_ERR_NO_SUCH_SERVICE: + return "SBC_ERR_NO_SUCH_SERVICE"; + case SBC_ERR_IO_FAILURE: + return "SBC_ERR_IO_FAILURE"; + case SBC_ERR_CAN_NOT_COMPLETE: + return "SBC_ERR_CAN_NOT_COMPLETE"; + case SBC_ERR_NO_MORE_ITEMS: + return "SBC_ERR_NO_MORE_ITEMS"; + case SBC_ERR_FILE_EXISTS: + return "SBC_ERR_FILE_EXISTS"; + case SBC_ERR_ACCESS_DENIED: + return "SBC_ERR_ACCESS_DENIED"; + } + + return "unknown sbcErr value"; +} + + /** * Tell whether the backend requires messaging to be set up * for the backend to work correctly. @@ -91,7 +127,7 @@ bool smbconf_changed(struct smbconf_ctx *ctx, struct smbconf_csn *csn, /** * Drop the whole configuration (restarting empty). */ -WERROR smbconf_drop(struct smbconf_ctx *ctx) +sbcErr smbconf_drop(struct smbconf_ctx *ctx) { return ctx->ops->drop(ctx); } @@ -105,12 +141,12 @@ WERROR smbconf_drop(struct smbconf_ctx *ctx) * param_names : list of lists of parameter names for each share * param_values : list of lists of parameter values for each share */ -WERROR smbconf_get_config(struct smbconf_ctx *ctx, +sbcErr smbconf_get_config(struct smbconf_ctx *ctx, TALLOC_CTX *mem_ctx, uint32_t *num_shares, struct smbconf_service ***services) { - WERROR werr = WERR_OK; + sbcErr err; TALLOC_CTX *tmp_ctx = NULL; uint32_t tmp_num_shares; char **tmp_share_names; @@ -118,36 +154,35 @@ WERROR smbconf_get_config(struct smbconf_ctx *ctx, uint32_t count; if ((num_shares == NULL) || (services == NULL)) { - werr = WERR_INVALID_PARAM; + err = SBC_ERR_INVALID_PARAM; goto done; } tmp_ctx = talloc_stackframe(); - werr = smbconf_get_share_names(ctx, tmp_ctx, &tmp_num_shares, - &tmp_share_names); - if (!W_ERROR_IS_OK(werr)) { + err = smbconf_get_share_names(ctx, tmp_ctx, &tmp_num_shares, + &tmp_share_names); + if (!SBC_ERROR_IS_OK(err)) { goto done; } tmp_services = talloc_array(tmp_ctx, struct smbconf_service *, tmp_num_shares); - if (tmp_services == NULL) { - werr = WERR_NOMEM; + err = SBC_ERR_NOMEM; goto done; } for (count = 0; count < tmp_num_shares; count++) { - werr = smbconf_get_share(ctx, tmp_services, - tmp_share_names[count], - &tmp_services[count]); - if (!W_ERROR_IS_OK(werr)) { + err = smbconf_get_share(ctx, tmp_services, + tmp_share_names[count], + &tmp_services[count]); + if (!SBC_ERROR_IS_OK(err)) { goto done; } } - werr = WERR_OK; + err = SBC_ERR_OK; *num_shares = tmp_num_shares; if (tmp_num_shares > 0) { @@ -158,13 +193,13 @@ WERROR smbconf_get_config(struct smbconf_ctx *ctx, done: talloc_free(tmp_ctx); - return werr; + return err; } /** * get the list of share names defined in the configuration. */ -WERROR smbconf_get_share_names(struct smbconf_ctx *ctx, +sbcErr smbconf_get_share_names(struct smbconf_ctx *ctx, TALLOC_CTX *mem_ctx, uint32_t *num_shares, char ***share_names) @@ -185,11 +220,11 @@ bool smbconf_share_exists(struct smbconf_ctx *ctx, /** * Add a service if it does not already exist. */ -WERROR smbconf_create_share(struct smbconf_ctx *ctx, +sbcErr smbconf_create_share(struct smbconf_ctx *ctx, const char *servicename) { if ((servicename != NULL) && smbconf_share_exists(ctx, servicename)) { - return WERR_FILE_EXISTS; + return SBC_ERR_FILE_EXISTS; } return ctx->ops->create_share(ctx, servicename); @@ -198,7 +233,7 @@ WERROR smbconf_create_share(struct smbconf_ctx *ctx, /** * get a definition of a share (service) from configuration. */ -WERROR smbconf_get_share(struct smbconf_ctx *ctx, +sbcErr smbconf_get_share(struct smbconf_ctx *ctx, TALLOC_CTX *mem_ctx, const char *servicename, struct smbconf_service **service) @@ -209,10 +244,10 @@ WERROR smbconf_get_share(struct smbconf_ctx *ctx, /** * delete a service from configuration */ -WERROR smbconf_delete_share(struct smbconf_ctx *ctx, const char *servicename) +sbcErr smbconf_delete_share(struct smbconf_ctx *ctx, const char *servicename) { if (!smbconf_share_exists(ctx, servicename)) { - return WERR_NO_SUCH_SERVICE; + return SBC_ERR_NO_SUCH_SERVICE; } return ctx->ops->delete_share(ctx, servicename); @@ -221,7 +256,7 @@ WERROR smbconf_delete_share(struct smbconf_ctx *ctx, const char *servicename) /** * set a configuration parameter to the value provided. */ -WERROR smbconf_set_parameter(struct smbconf_ctx *ctx, +sbcErr smbconf_set_parameter(struct smbconf_ctx *ctx, const char *service, const char *param, const char *valstr) @@ -235,30 +270,31 @@ WERROR smbconf_set_parameter(struct smbconf_ctx *ctx, * * This also creates [global] when it does not exist. */ -WERROR smbconf_set_global_parameter(struct smbconf_ctx *ctx, +sbcErr smbconf_set_global_parameter(struct smbconf_ctx *ctx, const char *param, const char *val) { - WERROR werr; + sbcErr err; - werr = smbconf_global_check(ctx); - if (W_ERROR_IS_OK(werr)) { - werr = smbconf_set_parameter(ctx, GLOBAL_NAME, param, val); + err = smbconf_global_check(ctx); + if (!SBC_ERROR_IS_OK(err)) { + return err; } + err = smbconf_set_parameter(ctx, GLOBAL_NAME, param, val); - return werr; + return err; } /** * get the value of a configuration parameter as a string */ -WERROR smbconf_get_parameter(struct smbconf_ctx *ctx, +sbcErr smbconf_get_parameter(struct smbconf_ctx *ctx, TALLOC_CTX *mem_ctx, const char *service, const char *param, char **valstr) { if (valstr == NULL) { - return WERR_INVALID_PARAM; + return SBC_ERR_INVALID_PARAM; } return ctx->ops->get_parameter(ctx, mem_ctx, service, param, valstr); @@ -269,26 +305,28 @@ WERROR smbconf_get_parameter(struct smbconf_ctx *ctx, * * Create [global] if it does not exist. */ -WERROR smbconf_get_global_parameter(struct smbconf_ctx *ctx, +sbcErr smbconf_get_global_parameter(struct smbconf_ctx *ctx, TALLOC_CTX *mem_ctx, const char *param, char **valstr) { - WERROR werr; + sbcErr err; - werr = smbconf_global_check(ctx); - if (W_ERROR_IS_OK(werr)) { - werr = smbconf_get_parameter(ctx, mem_ctx, GLOBAL_NAME, param, - valstr); + err = smbconf_global_check(ctx); + if (!SBC_ERROR_IS_OK(err)) { + return err; } - return werr; + err = smbconf_get_parameter(ctx, mem_ctx, GLOBAL_NAME, param, + valstr); + + return err; } /** * delete a parameter from configuration */ -WERROR smbconf_delete_parameter(struct smbconf_ctx *ctx, +sbcErr smbconf_delete_parameter(struct smbconf_ctx *ctx, const char *service, const char *param) { return ctx->ops->delete_parameter(ctx, service, param); @@ -299,20 +337,21 @@ WERROR smbconf_delete_parameter(struct smbconf_ctx *ctx, * * Create [global] if it does not exist. */ -WERROR smbconf_delete_global_parameter(struct smbconf_ctx *ctx, +sbcErr smbconf_delete_global_parameter(struct smbconf_ctx *ctx, const char *param) { - WERROR werr; + sbcErr err; - werr = smbconf_global_check(ctx); - if (W_ERROR_IS_OK(werr)) { - werr = smbconf_delete_parameter(ctx, GLOBAL_NAME, param); + err = smbconf_global_check(ctx); + if (!SBC_ERROR_IS_OK(err)) { + return err; } + err = smbconf_delete_parameter(ctx, GLOBAL_NAME, param); - return werr; + return err; } -WERROR smbconf_get_includes(struct smbconf_ctx *ctx, +sbcErr smbconf_get_includes(struct smbconf_ctx *ctx, TALLOC_CTX *mem_ctx, const char *service, uint32_t *num_includes, char ***includes) @@ -321,72 +360,75 @@ WERROR smbconf_get_includes(struct smbconf_ctx *ctx, includes); } -WERROR smbconf_get_global_includes(struct smbconf_ctx *ctx, +sbcErr smbconf_get_global_includes(struct smbconf_ctx *ctx, TALLOC_CTX *mem_ctx, uint32_t *num_includes, char ***includes) { - WERROR werr; + sbcErr err; - werr = smbconf_global_check(ctx); - if (W_ERROR_IS_OK(werr)) { - werr = smbconf_get_includes(ctx, mem_ctx, GLOBAL_NAME, - num_includes, includes); + err = smbconf_global_check(ctx); + if (!SBC_ERROR_IS_OK(err)) { + return err; } + err = smbconf_get_includes(ctx, mem_ctx, GLOBAL_NAME, + num_includes, includes); - return werr; + return err; } -WERROR smbconf_set_includes(struct smbconf_ctx *ctx, +sbcErr smbconf_set_includes(struct smbconf_ctx *ctx, const char *service, uint32_t num_includes, const char **includes) { return ctx->ops->set_includes(ctx, service, num_includes, includes); } -WERROR smbconf_set_global_includes(struct smbconf_ctx *ctx, +sbcErr smbconf_set_global_includes(struct smbconf_ctx *ctx, uint32_t num_includes, const char **includes) { - WERROR werr; + sbcErr err; - werr = smbconf_global_check(ctx); - if (W_ERROR_IS_OK(werr)) { - werr = smbconf_set_includes(ctx, GLOBAL_NAME, - num_includes, includes); + err = smbconf_global_check(ctx); + if (!SBC_ERROR_IS_OK(err)) { + return err; } + err = smbconf_set_includes(ctx, GLOBAL_NAME, + num_includes, includes); - return werr; + return err; } -WERROR smbconf_delete_includes(struct smbconf_ctx *ctx, const char *service) +sbcErr smbconf_delete_includes(struct smbconf_ctx *ctx, const char *service) { return ctx->ops->delete_includes(ctx, service); } -WERROR smbconf_delete_global_includes(struct smbconf_ctx *ctx) +sbcErr smbconf_delete_global_includes(struct smbconf_ctx *ctx) { - WERROR werr; + sbcErr err; - werr = smbconf_global_check(ctx); - if (W_ERROR_IS_OK(werr)) { - werr = smbconf_delete_includes(ctx, GLOBAL_NAME); + err = smbconf_global_check(ctx); + if (!SBC_ERROR_IS_OK(err)) { + return err; } + err = smbconf_delete_includes(ctx, GLOBAL_NAME); - return werr; + return err; } -WERROR smbconf_transaction_start(struct smbconf_ctx *ctx) +sbcErr smbconf_transaction_start(struct smbconf_ctx *ctx) { return ctx->ops->transaction_start(ctx); } -WERROR smbconf_transaction_commit(struct smbconf_ctx *ctx) +sbcErr smbconf_transaction_commit(struct smbconf_ctx *ctx) { return ctx->ops->transaction_commit(ctx); } -WERROR smbconf_transaction_cancel(struct smbconf_ctx *ctx) +sbcErr smbconf_transaction_cancel(struct smbconf_ctx *ctx) { return ctx->ops->transaction_cancel(ctx); } diff --git a/lib/smbconf/smbconf.h b/lib/smbconf/smbconf.h index 517302ac88..7f62b06af4 100644 --- a/lib/smbconf/smbconf.h +++ b/lib/smbconf/smbconf.h @@ -20,6 +20,39 @@ #ifndef __LIBSMBCONF_H__ #define __LIBSMBCONF_H__ +/** + * @defgroup libsmbconf The smbconf API + * + * libsmbconf is a library to read or, based on the backend, modify the Samba + * configuration. + * + * @{ + */ + +/** + * @brief Status codes returned from smbconf functions + */ +enum _sbcErrType { + SBC_ERR_OK = 0, /**< Successful completion **/ + SBC_ERR_NOT_IMPLEMENTED, /**< Function not implemented **/ + SBC_ERR_NOT_SUPPORTED, /**< Function not supported **/ + SBC_ERR_UNKNOWN_FAILURE, /**< General failure **/ + SBC_ERR_NOMEM, /**< Memory allocation error **/ + SBC_ERR_INVALID_PARAM, /**< An Invalid parameter was supplied **/ + SBC_ERR_BADFILE, /**< A bad file was supplied **/ + SBC_ERR_NO_SUCH_SERVICE, /**< There is no such service provided **/ + SBC_ERR_IO_FAILURE, /**< There was an IO error **/ + SBC_ERR_CAN_NOT_COMPLETE,/**< Can not complete action **/ + SBC_ERR_NO_MORE_ITEMS, /**< No more items left **/ + SBC_ERR_FILE_EXISTS, /**< File already exists **/ + SBC_ERR_ACCESS_DENIED, /**< Access has been denied **/ +}; + +typedef enum _sbcErrType sbcErr; + +#define SBC_ERROR_IS_OK(x) ((x) == SBC_ERR_OK) +#define SBC_ERROR_EQUAL(x,y) ((x) == (y)) + struct smbconf_ctx; /* the change sequence number */ @@ -27,75 +60,428 @@ struct smbconf_csn { uint64_t csn; }; +/** Information about a service */ struct smbconf_service { - char *name; - uint32_t num_params; - char **param_names; - char **param_values; + char *name; /**< The name of the share */ + uint32_t num_params; /**< List of length num_shares of parameter counts for each share */ + char **param_names; /**< List of lists of parameter names for each share */ + char **param_values; /**< List of lists of parameter values for each share */ }; /* - * the smbconf API functions + * The smbconf API functions + */ + +/** + * @brief Translate an error value into a string + * + * @param error + * + * @return a pointer to a static string + **/ +const char *sbcErrorString(sbcErr error); + +/** + * @brief Check if the backend requires messaging to be set up. + * + * Tell whether the backend requires messaging to be set up + * for the backend to work correctly. + * + * @param[in] ctx The smbconf context to check. + * + * @return True if needed, false if not. */ bool smbconf_backend_requires_messaging(struct smbconf_ctx *ctx); + +/** + * @brief Tell whether the source is writeable. + * + * @param[in] ctx The smbconf context to check. + * + * @return True if it is writeable, false if not. + */ bool smbconf_is_writeable(struct smbconf_ctx *ctx); + +/** + * @brief Close the configuration. + * + * @param[in] ctx The smbconf context to close. + */ void smbconf_shutdown(struct smbconf_ctx *ctx); + +/** + * @brief Detect changes in the configuration. + * + * Get the change sequence number of the given service/parameter. Service and + * parameter strings may be NULL. + * + * The given change sequence number (csn) struct is filled with the current + * csn. smbconf_changed() can also be used for initial retrieval of the csn. + * + * @param[in] ctx The smbconf context to check for changes. + * + * @param[inout] csn The smbconf csn to be filled. + * + * @param[in] service The service name to check or NULL. + * + * @param[in] param The param to check or NULL. + * + * @return True if it has been changed, false if not. + */ bool smbconf_changed(struct smbconf_ctx *ctx, struct smbconf_csn *csn, const char *service, const char *param); -WERROR smbconf_drop(struct smbconf_ctx *ctx); -WERROR smbconf_get_config(struct smbconf_ctx *ctx, + +/** + * @brief Drop the whole configuration (restarting empty). + * + * @param[in] ctx The smbconf context to drop the config. + * + * @return SBC_ERR_OK on success, a corresponding sbcErr if an + * error occured. + */ +sbcErr smbconf_drop(struct smbconf_ctx *ctx); + +/** + * @brief Get the whole configuration as lists of strings with counts. + * + * @param[in] ctx The smbconf context to get the lists from. + * + * @param[in] mem_ctx The memory context to use. + * + * @param[in] num_shares A pointer to store the number of shares. + * + * @param[out] services A pointer to store the services. + * + * @return SBC_ERR_OK on success, a corresponding sbcErr if an + * error occured. + * + * @see smbconf_service + */ +sbcErr smbconf_get_config(struct smbconf_ctx *ctx, TALLOC_CTX *mem_ctx, uint32_t *num_shares, struct smbconf_service ***services); -WERROR smbconf_get_share_names(struct smbconf_ctx *ctx, + +/** + * @brief Get the list of share names defined in the configuration. + * + * @param[in] ctx The smbconf context to use. + * + * @param[in] mem_ctx The memory context to use. + * + * @param[in] num_shares A pointer to store the number of shares. + * + * @param[in] share_names A pointer to store the share names. + * + * @return SBC_ERR_OK on success, a corresponding sbcErr if an + * error occured. + */ +sbcErr smbconf_get_share_names(struct smbconf_ctx *ctx, TALLOC_CTX *mem_ctx, uint32_t *num_shares, char ***share_names); + +/** + * @brief Check if a share/service of a given name exists. + * + * @param[in] ctx The smbconf context to use. + * + * @param[in] servicename The service name to check if it exists. + * + * @return True if it exists, false if not. + */ bool smbconf_share_exists(struct smbconf_ctx *ctx, const char *servicename); -WERROR smbconf_create_share(struct smbconf_ctx *ctx, const char *servicename); -WERROR smbconf_get_share(struct smbconf_ctx *ctx, + +/** + * @brief Add a service if it does not already exist. + * + * @param[in] ctx The smbconf context to use. + * + * @param[in] servicename The name of the service to add. + * + * @return SBC_ERR_OK on success, a corresponding sbcErr if an + * error occured. + */ +sbcErr smbconf_create_share(struct smbconf_ctx *ctx, const char *servicename); + +/** + * @brief Get a definition of a share (service) from configuration. + * + * @param[in] ctx The smbconf context to use. + * + * @param[in] mem_ctx A memory context to allocate the result. + * + * @param[in] servicename The service name to get the information from. + * + * @param[out] service A pointer to store the service information about the + * share. + * + * @return SBC_ERR_OK on success, a corresponding sbcErr if an + * error occured. + * + * @see smbconf_service + */ +sbcErr smbconf_get_share(struct smbconf_ctx *ctx, TALLOC_CTX *mem_ctx, const char *servicename, struct smbconf_service **service); -WERROR smbconf_delete_share(struct smbconf_ctx *ctx, + +/** + * @brief Delete a service from configuration. + * + * @param[in] ctx The smbconf context to use. + * + * @param[in] servicename The service name to delete. + * + * @return SBC_ERR_OK on success, a corresponding sbcErr if an + * error occured. + */ +sbcErr smbconf_delete_share(struct smbconf_ctx *ctx, const char *servicename); -WERROR smbconf_set_parameter(struct smbconf_ctx *ctx, + +/** + * @brief Set a configuration parameter to the value provided. + * + * @param[in] ctx The smbconf context to use. + * + * @param[in] service The service name to set the parameter. + * + * @param[in] param The name of the parameter to set. + * + * @param[in] valstr The value to set. + * + * @return SBC_ERR_OK on success, a corresponding sbcErr if an + * error occured. + */ +sbcErr smbconf_set_parameter(struct smbconf_ctx *ctx, const char *service, const char *param, const char *valstr); -WERROR smbconf_set_global_parameter(struct smbconf_ctx *ctx, + +/** + * @brief Set a global configuration parameter to the value provided. + * + * This adds a paramet in the [global] service. It also creates [global] if it + * does't exist. + * + * @param[in] ctx The smbconf context to use. + * + * @param[in] param The name of the parameter to set. + * + * @param[in] val The value to set. + * + * @return SBC_ERR_OK on success, a corresponding sbcErr if an + * error occured. + */ +sbcErr smbconf_set_global_parameter(struct smbconf_ctx *ctx, const char *param, const char *val); -WERROR smbconf_get_parameter(struct smbconf_ctx *ctx, + +/** + * @brief Get the value of a configuration parameter as a string. + * + * @param[in] ctx The smbconf context to use. + * + * @param[in] mem_ctx The memory context to allocate the string on. + * + * @param[in] service The name of the service where to find the parameter. + * + * @param[in] param The parameter to get. + * + * @param[out] valstr A pointer to store the value as a string. + * + * @return SBC_ERR_OK on success, a corresponding sbcErr if an + * error occured. + */ +sbcErr smbconf_get_parameter(struct smbconf_ctx *ctx, TALLOC_CTX *mem_ctx, const char *service, const char *param, char **valstr); -WERROR smbconf_get_global_parameter(struct smbconf_ctx *ctx, + +/** + * @brief Get the value of a global configuration parameter as a string. + * + * It also creates [global] if it does't exist. + * + * @param[in] ctx The smbconf context to use. + * + * @param[in] mem_ctx The memory context to allocate the string on. + * + * @param[in] param The parameter to get. + * + * @param[out] valstr A pointer to store the value as a string. + * + * @return SBC_ERR_OK on success, a corresponding sbcErr if an + * error occured. + */ +sbcErr smbconf_get_global_parameter(struct smbconf_ctx *ctx, TALLOC_CTX *mem_ctx, const char *param, char **valstr); -WERROR smbconf_delete_parameter(struct smbconf_ctx *ctx, + +/** + * @brief Delete a parameter from the configuration. + * + * @param[in] ctx The smbconf context to use. + * + * @param[in] service The service where the parameter can be found. + * + * @param[in] param The name of the parameter to delete. + * + * @return SBC_ERR_OK on success, a corresponding sbcErr if an + * error occured. + */ +sbcErr smbconf_delete_parameter(struct smbconf_ctx *ctx, const char *service, const char *param); -WERROR smbconf_delete_global_parameter(struct smbconf_ctx *ctx, + +/** + * @brief Delete a global parameter from the configuration. + * + * It also creates [global] if it does't exist. + * + * @param[in] ctx The smbconf context to use. + * + * @param[in] param The name of the parameter to delete. + * + * @return SBC_ERR_OK on success, a corresponding sbcErr if an + * error occured. + */ +sbcErr smbconf_delete_global_parameter(struct smbconf_ctx *ctx, const char *param); -WERROR smbconf_get_includes(struct smbconf_ctx *ctx, + +/** + * @brief Get the list of names of included files. + * + * @param[in] ctx The smbconf context to use. + * + * @param[in] mem_ctx The memory context to allocate the names. + * + * @param[in] service The service name to get the include files. + * + * @param[out] num_includes A pointer to store the number of included files. + * + * @param[out] includes A pointer to store the paths of the included files. + * + * @return SBC_ERR_OK on success, a corresponding sbcErr if an + * error occured. + */ +sbcErr smbconf_get_includes(struct smbconf_ctx *ctx, TALLOC_CTX *mem_ctx, const char *service, uint32_t *num_includes, char ***includes); -WERROR smbconf_get_global_includes(struct smbconf_ctx *ctx, + +/** + * @brief Get the list of globally included files. + * + * @param[in] ctx The smbconf context to use. + * + * @param[in] mem_ctx The memory context to allocate the names. + * + * @param[out] num_includes A pointer to store the number of included files. + * + * @param[out] includes A pointer to store the paths of the included files. + * + * @return SBC_ERR_OK on success, a corresponding sbcErr if an + * error occured. + */ +sbcErr smbconf_get_global_includes(struct smbconf_ctx *ctx, TALLOC_CTX *mem_ctx, uint32_t *num_includes, char ***includes); -WERROR smbconf_set_includes(struct smbconf_ctx *ctx, + +/** + * @brief Set a list of config files to include on the given service. + * + * @param[in] ctx The smbconf context to use. + * + * @param[in] service The service to add includes. + * + * @param[in] num_includes The number of includes to set. + * + * @param[in] includes A list of paths to include. + * + * @return SBC_ERR_OK on success, a corresponding sbcErr if an + * error occured. + */ +sbcErr smbconf_set_includes(struct smbconf_ctx *ctx, const char *service, uint32_t num_includes, const char **includes); -WERROR smbconf_set_global_includes(struct smbconf_ctx *ctx, + +/** + * @brief Set a list of config files to include globally. + * + * @param[in] ctx The smbconf context to use. + * + * @param[in] num_includes The number of includes to set. + * + * @param[in] includes A list of paths to include. + * + * @return SBC_ERR_OK on success, a corresponding sbcErr if an + * error occured. + */ +sbcErr smbconf_set_global_includes(struct smbconf_ctx *ctx, uint32_t num_includes, const char **includes); -WERROR smbconf_delete_includes(struct smbconf_ctx *ctx, const char *service); -WERROR smbconf_delete_global_includes(struct smbconf_ctx *ctx); -WERROR smbconf_transaction_start(struct smbconf_ctx *ctx); -WERROR smbconf_transaction_commit(struct smbconf_ctx *ctx); -WERROR smbconf_transaction_cancel(struct smbconf_ctx *ctx); +/** + * @brief Delete include parameter on the given service. + * + * @param[in] ctx The smbconf context to use. + * + * @param[in] service The name of the service to delete the includes from. + * + * @return SBC_ERR_OK on success, a corresponding sbcErr if an + * error occured. + */ +sbcErr smbconf_delete_includes(struct smbconf_ctx *ctx, const char *service); + +/** + * @brief Delete include parameter from the global service. + * + * @param[in] ctx The smbconf context to use. + * + * @return SBC_ERR_OK on success, a corresponding sbcErr if an + * error occured. + */ +sbcErr smbconf_delete_global_includes(struct smbconf_ctx *ctx); + +/** + * @brief Start a transaction on the configuration backend. + * + * This is to speed up writes to the registry based backend. + * + * @param[in] ctx The smbconf context to start the transaction. + * + * @return SBC_ERR_OK on success, a corresponding sbcErr if an + * error occured. + */ +sbcErr smbconf_transaction_start(struct smbconf_ctx *ctx); + +/** + * @brief Commit a transaction on the configuration backend. + * + * This is to speed up writes to the registry based backend. + * + * @param[in] ctx The smbconf context to commit the transaction. + * + * @return SBC_ERR_OK on success, a corresponding sbcErr if an + * error occured. + * + * @see smbconf_transaction_start() + */ +sbcErr smbconf_transaction_commit(struct smbconf_ctx *ctx); + +/** + * @brief Cancel a transaction on the configuration backend. + * + * @param[in] ctx The smbconf context to cancel the transaction. + * + * @return SBC_ERR_OK on success, a corresponding sbcErr if an + * error occured. + * + * @see smbconf_transaction_start() + */ +sbcErr smbconf_transaction_cancel(struct smbconf_ctx *ctx); + +/* @} ******************************************************************/ #endif /* _LIBSMBCONF_H_ */ diff --git a/lib/smbconf/smbconf_private.h b/lib/smbconf/smbconf_private.h index e6998ad639..e768c30b91 100644 --- a/lib/smbconf/smbconf_private.h +++ b/lib/smbconf/smbconf_private.h @@ -27,50 +27,50 @@ #include "lib/smbconf/smbconf.h" struct smbconf_ops { - WERROR (*init)(struct smbconf_ctx *ctx, const char *path); + sbcErr (*init)(struct smbconf_ctx *ctx, const char *path); int (*shutdown)(struct smbconf_ctx *ctx); bool (*requires_messaging)(struct smbconf_ctx *ctx); bool (*is_writeable)(struct smbconf_ctx *ctx); - WERROR (*open_conf)(struct smbconf_ctx *ctx); + sbcErr (*open_conf)(struct smbconf_ctx *ctx); int (*close_conf)(struct smbconf_ctx *ctx); void (*get_csn)(struct smbconf_ctx *ctx, struct smbconf_csn *csn, const char *service, const char *param); - WERROR (*drop)(struct smbconf_ctx *ctx); - WERROR (*get_share_names)(struct smbconf_ctx *ctx, + sbcErr (*drop)(struct smbconf_ctx *ctx); + sbcErr (*get_share_names)(struct smbconf_ctx *ctx, TALLOC_CTX *mem_ctx, uint32_t *num_shares, char ***share_names); bool (*share_exists)(struct smbconf_ctx *ctx, const char *service); - WERROR (*create_share)(struct smbconf_ctx *ctx, const char *service); - WERROR (*get_share)(struct smbconf_ctx *ctx, + sbcErr (*create_share)(struct smbconf_ctx *ctx, const char *service); + sbcErr (*get_share)(struct smbconf_ctx *ctx, TALLOC_CTX *mem_ctx, const char *servicename, struct smbconf_service **service); - WERROR (*delete_share)(struct smbconf_ctx *ctx, + sbcErr (*delete_share)(struct smbconf_ctx *ctx, const char *servicename); - WERROR (*set_parameter)(struct smbconf_ctx *ctx, + sbcErr (*set_parameter)(struct smbconf_ctx *ctx, const char *service, const char *param, const char *valstr); - WERROR (*get_parameter)(struct smbconf_ctx *ctx, + sbcErr (*get_parameter)(struct smbconf_ctx *ctx, TALLOC_CTX *mem_ctx, const char *service, const char *param, char **valstr); - WERROR (*delete_parameter)(struct smbconf_ctx *ctx, + sbcErr (*delete_parameter)(struct smbconf_ctx *ctx, const char *service, const char *param); - WERROR (*get_includes)(struct smbconf_ctx *ctx, + sbcErr (*get_includes)(struct smbconf_ctx *ctx, TALLOC_CTX *mem_ctx, const char *service, uint32_t *num_includes, char ***includes); - WERROR (*set_includes)(struct smbconf_ctx *ctx, + sbcErr (*set_includes)(struct smbconf_ctx *ctx, const char *service, uint32_t num_includes, const char **includes); - WERROR (*delete_includes)(struct smbconf_ctx *ctx, + sbcErr (*delete_includes)(struct smbconf_ctx *ctx, const char *service); - WERROR (*transaction_start)(struct smbconf_ctx *ctx); - WERROR (*transaction_commit)(struct smbconf_ctx *ctx); - WERROR (*transaction_cancel)(struct smbconf_ctx *ctx); + sbcErr (*transaction_start)(struct smbconf_ctx *ctx); + sbcErr (*transaction_commit)(struct smbconf_ctx *ctx); + sbcErr (*transaction_cancel)(struct smbconf_ctx *ctx); }; struct smbconf_ctx { @@ -79,10 +79,10 @@ struct smbconf_ctx { void *data; /* private data for use in backends */ }; -WERROR smbconf_init_internal(TALLOC_CTX *mem_ctx, struct smbconf_ctx **conf_ctx, +sbcErr smbconf_init_internal(TALLOC_CTX *mem_ctx, struct smbconf_ctx **conf_ctx, const char *path, struct smbconf_ops *ops); -WERROR smbconf_add_string_to_array(TALLOC_CTX *mem_ctx, +sbcErr smbconf_add_string_to_array(TALLOC_CTX *mem_ctx, char ***array, uint32_t count, const char *string); diff --git a/lib/smbconf/smbconf_txt.c b/lib/smbconf/smbconf_txt.c index 2114841b81..5c4bd27b9d 100644 --- a/lib/smbconf/smbconf_txt.c +++ b/lib/smbconf/smbconf_txt.c @@ -60,7 +60,7 @@ static struct txt_private_data *pd(struct smbconf_ctx *ctx) static bool smbconf_txt_do_section(const char *section, void *private_data) { - WERROR werr; + sbcErr err; uint32_t idx; struct txt_private_data *tpd = (struct txt_private_data *)private_data; struct txt_cache *cache = tpd->cache; @@ -72,9 +72,9 @@ static bool smbconf_txt_do_section(const char *section, void *private_data) return true; } - werr = smbconf_add_string_to_array(cache, &(cache->share_names), - cache->num_shares, section); - if (!W_ERROR_IS_OK(werr)) { + err = smbconf_add_string_to_array(cache, &(cache->share_names), + cache->num_shares, section); + if (!SBC_ERROR_IS_OK(err)) { return false; } cache->current_share = cache->num_shares; @@ -114,7 +114,7 @@ static bool smbconf_txt_do_parameter(const char *param_name, const char *param_value, void *private_data) { - WERROR werr; + sbcErr err; char **param_names, **param_values; uint32_t num_params; uint32_t idx; @@ -146,17 +146,17 @@ static bool smbconf_txt_do_parameter(const char *param_name, } return true; } - werr = smbconf_add_string_to_array(cache, + err = smbconf_add_string_to_array(cache, &(cache->param_names[cache->current_share]), num_params, param_name); - if (!W_ERROR_IS_OK(werr)) { + if (!SBC_ERROR_IS_OK(err)) { return false; } - werr = smbconf_add_string_to_array(cache, + err = smbconf_add_string_to_array(cache, &(cache->param_values[cache->current_share]), num_params, param_value); cache->num_params[cache->current_share]++; - return W_ERROR_IS_OK(werr); + return SBC_ERROR_IS_OK(err); } static void smbconf_txt_flush_cache(struct smbconf_ctx *ctx) @@ -165,7 +165,7 @@ static void smbconf_txt_flush_cache(struct smbconf_ctx *ctx) pd(ctx)->cache = NULL; } -static WERROR smbconf_txt_init_cache(struct smbconf_ctx *ctx) +static sbcErr smbconf_txt_init_cache(struct smbconf_ctx *ctx) { if (pd(ctx)->cache != NULL) { smbconf_txt_flush_cache(ctx); @@ -174,40 +174,40 @@ static WERROR smbconf_txt_init_cache(struct smbconf_ctx *ctx) pd(ctx)->cache = talloc_zero(pd(ctx), struct txt_cache); if (pd(ctx)->cache == NULL) { - return WERR_NOMEM; + return SBC_ERR_NOMEM; } - return WERR_OK; + return SBC_ERR_OK; } -static WERROR smbconf_txt_load_file(struct smbconf_ctx *ctx) +static sbcErr smbconf_txt_load_file(struct smbconf_ctx *ctx) { - WERROR werr; + sbcErr err; uint64_t new_csn; if (!file_exist(ctx->path)) { - return WERR_BADFILE; + return SBC_ERR_BADFILE; } new_csn = (uint64_t)file_modtime(ctx->path); if (new_csn == pd(ctx)->csn) { - return WERR_OK; + return SBC_ERR_OK; } - werr = smbconf_txt_init_cache(ctx); - if (!W_ERROR_IS_OK(werr)) { - return werr; + err = smbconf_txt_init_cache(ctx); + if (!SBC_ERROR_IS_OK(err)) { + return err; } if (!pm_process(ctx->path, smbconf_txt_do_section, smbconf_txt_do_parameter, pd(ctx))) { - return WERR_CAN_NOT_COMPLETE; + return SBC_ERR_CAN_NOT_COMPLETE; } pd(ctx)->csn = new_csn; - return WERR_OK; + return SBC_ERR_OK; } @@ -220,24 +220,24 @@ static WERROR smbconf_txt_load_file(struct smbconf_ctx *ctx) /** * initialize the text based smbconf backend */ -static WERROR smbconf_txt_init(struct smbconf_ctx *ctx, const char *path) +static sbcErr smbconf_txt_init(struct smbconf_ctx *ctx, const char *path) { if (path == NULL) { - return WERR_BADFILE; + return SBC_ERR_BADFILE; } ctx->path = talloc_strdup(ctx, path); if (ctx->path == NULL) { - return WERR_NOMEM; + return SBC_ERR_NOMEM; } ctx->data = talloc_zero(ctx, struct txt_private_data); if (ctx->data == NULL) { - return WERR_NOMEM; + return SBC_ERR_NOMEM; } pd(ctx)->verbatim = true; - return WERR_OK; + return SBC_ERR_OK; } static int smbconf_txt_shutdown(struct smbconf_ctx *ctx) @@ -256,7 +256,7 @@ static bool smbconf_txt_is_writeable(struct smbconf_ctx *ctx) return false; } -static WERROR smbconf_txt_open(struct smbconf_ctx *ctx) +static sbcErr smbconf_txt_open(struct smbconf_ctx *ctx) { return smbconf_txt_load_file(ctx); } @@ -285,15 +285,15 @@ static void smbconf_txt_get_csn(struct smbconf_ctx *ctx, /** * Drop the whole configuration (restarting empty) */ -static WERROR smbconf_txt_drop(struct smbconf_ctx *ctx) +static sbcErr smbconf_txt_drop(struct smbconf_ctx *ctx) { - return WERR_NOT_SUPPORTED; + return SBC_ERR_NOT_SUPPORTED; } /** * get the list of share names defined in the configuration. */ -static WERROR smbconf_txt_get_share_names(struct smbconf_ctx *ctx, +static sbcErr smbconf_txt_get_share_names(struct smbconf_ctx *ctx, TALLOC_CTX *mem_ctx, uint32_t *num_shares, char ***share_names) @@ -301,17 +301,16 @@ static WERROR smbconf_txt_get_share_names(struct smbconf_ctx *ctx, uint32_t count; uint32_t added_count = 0; TALLOC_CTX *tmp_ctx = NULL; - WERROR werr = WERR_OK; + sbcErr err = SBC_ERR_OK; char **tmp_share_names = NULL; if ((num_shares == NULL) || (share_names == NULL)) { - werr = WERR_INVALID_PARAM; - goto done; + return SBC_ERR_INVALID_PARAM; } - werr = smbconf_txt_load_file(ctx); - if (!W_ERROR_IS_OK(werr)) { - return werr; + err = smbconf_txt_load_file(ctx); + if (!SBC_ERROR_IS_OK(err)) { + return err; } tmp_ctx = talloc_stackframe(); @@ -320,18 +319,18 @@ static WERROR smbconf_txt_get_share_names(struct smbconf_ctx *ctx, * possibly after NULL section */ if (smbconf_share_exists(ctx, NULL)) { - werr = smbconf_add_string_to_array(tmp_ctx, &tmp_share_names, - 0, NULL); - if (!W_ERROR_IS_OK(werr)) { + err = smbconf_add_string_to_array(tmp_ctx, &tmp_share_names, + 0, NULL); + if (!SBC_ERROR_IS_OK(err)) { goto done; } added_count++; } if (smbconf_share_exists(ctx, GLOBAL_NAME)) { - werr = smbconf_add_string_to_array(tmp_ctx, &tmp_share_names, + err = smbconf_add_string_to_array(tmp_ctx, &tmp_share_names, added_count, GLOBAL_NAME); - if (!W_ERROR_IS_OK(werr)) { + if (!SBC_ERROR_IS_OK(err)) { goto done; } added_count++; @@ -344,10 +343,10 @@ static WERROR smbconf_txt_get_share_names(struct smbconf_ctx *ctx, continue; } - werr = smbconf_add_string_to_array(tmp_ctx, &tmp_share_names, + err = smbconf_add_string_to_array(tmp_ctx, &tmp_share_names, added_count, pd(ctx)->cache->share_names[count]); - if (!W_ERROR_IS_OK(werr)) { + if (!SBC_ERROR_IS_OK(err)) { goto done; } added_count++; @@ -362,7 +361,7 @@ static WERROR smbconf_txt_get_share_names(struct smbconf_ctx *ctx, done: talloc_free(tmp_ctx); - return werr; + return err; } /** @@ -371,10 +370,10 @@ done: static bool smbconf_txt_share_exists(struct smbconf_ctx *ctx, const char *servicename) { - WERROR werr; + sbcErr err; - werr = smbconf_txt_load_file(ctx); - if (!W_ERROR_IS_OK(werr)) { + err = smbconf_txt_load_file(ctx); + if (!SBC_ERROR_IS_OK(err)) { return false; } @@ -386,29 +385,29 @@ static bool smbconf_txt_share_exists(struct smbconf_ctx *ctx, /** * Add a service if it does not already exist */ -static WERROR smbconf_txt_create_share(struct smbconf_ctx *ctx, +static sbcErr smbconf_txt_create_share(struct smbconf_ctx *ctx, const char *servicename) { - return WERR_NOT_SUPPORTED; + return SBC_ERR_NOT_SUPPORTED; } /** * get a definition of a share (service) from configuration. */ -static WERROR smbconf_txt_get_share(struct smbconf_ctx *ctx, +static sbcErr smbconf_txt_get_share(struct smbconf_ctx *ctx, TALLOC_CTX *mem_ctx, const char *servicename, struct smbconf_service **service) { - WERROR werr; + sbcErr err; uint32_t sidx, count; bool found; TALLOC_CTX *tmp_ctx = NULL; struct smbconf_service *tmp_service = NULL; - werr = smbconf_txt_load_file(ctx); - if (!W_ERROR_IS_OK(werr)) { - return werr; + err = smbconf_txt_load_file(ctx); + if (!SBC_ERROR_IS_OK(err)) { + return err; } found = smbconf_find_in_array(servicename, @@ -416,38 +415,38 @@ static WERROR smbconf_txt_get_share(struct smbconf_ctx *ctx, pd(ctx)->cache->num_shares, &sidx); if (!found) { - return WERR_NO_SUCH_SERVICE; + return SBC_ERR_NO_SUCH_SERVICE; } tmp_ctx = talloc_stackframe(); tmp_service = talloc_zero(tmp_ctx, struct smbconf_service); if (tmp_service == NULL) { - werr = WERR_NOMEM; + err = SBC_ERR_NOMEM; goto done; } if (servicename != NULL) { tmp_service->name = talloc_strdup(tmp_service, servicename); if (tmp_service->name == NULL) { - werr = WERR_NOMEM; + err = SBC_ERR_NOMEM; goto done; } } for (count = 0; count < pd(ctx)->cache->num_params[sidx]; count++) { - werr = smbconf_add_string_to_array(tmp_service, + err = smbconf_add_string_to_array(tmp_service, &(tmp_service->param_names), count, pd(ctx)->cache->param_names[sidx][count]); - if (!W_ERROR_IS_OK(werr)) { + if (!SBC_ERROR_IS_OK(err)) { goto done; } - werr = smbconf_add_string_to_array(tmp_service, + err = smbconf_add_string_to_array(tmp_service, &(tmp_service->param_values), count, pd(ctx)->cache->param_values[sidx][count]); - if (!W_ERROR_IS_OK(werr)) { + if (!SBC_ERROR_IS_OK(err)) { goto done; } } @@ -457,45 +456,45 @@ static WERROR smbconf_txt_get_share(struct smbconf_ctx *ctx, done: talloc_free(tmp_ctx); - return werr; + return err; } /** * delete a service from configuration */ -static WERROR smbconf_txt_delete_share(struct smbconf_ctx *ctx, +static sbcErr smbconf_txt_delete_share(struct smbconf_ctx *ctx, const char *servicename) { - return WERR_NOT_SUPPORTED; + return SBC_ERR_NOT_SUPPORTED; } /** * set a configuration parameter to the value provided. */ -static WERROR smbconf_txt_set_parameter(struct smbconf_ctx *ctx, +static sbcErr smbconf_txt_set_parameter(struct smbconf_ctx *ctx, const char *service, const char *param, const char *valstr) { - return WERR_NOT_SUPPORTED; + return SBC_ERR_NOT_SUPPORTED; } /** * get the value of a configuration parameter as a string */ -static WERROR smbconf_txt_get_parameter(struct smbconf_ctx *ctx, +static sbcErr smbconf_txt_get_parameter(struct smbconf_ctx *ctx, TALLOC_CTX *mem_ctx, const char *service, const char *param, char **valstr) { - WERROR werr; + sbcErr err; bool found; uint32_t share_index, param_index; - werr = smbconf_txt_load_file(ctx); - if (!W_ERROR_IS_OK(werr)) { - return werr; + err = smbconf_txt_load_file(ctx); + if (!SBC_ERROR_IS_OK(err)) { + return err; } found = smbconf_find_in_array(service, @@ -503,7 +502,7 @@ static WERROR smbconf_txt_get_parameter(struct smbconf_ctx *ctx, pd(ctx)->cache->num_shares, &share_index); if (!found) { - return WERR_NO_SUCH_SERVICE; + return SBC_ERR_NO_SUCH_SERVICE; } found = smbconf_reverse_find_in_array(param, @@ -511,45 +510,45 @@ static WERROR smbconf_txt_get_parameter(struct smbconf_ctx *ctx, pd(ctx)->cache->num_params[share_index], ¶m_index); if (!found) { - return WERR_INVALID_PARAM; + return SBC_ERR_INVALID_PARAM; } *valstr = talloc_strdup(mem_ctx, pd(ctx)->cache->param_values[share_index][param_index]); if (*valstr == NULL) { - return WERR_NOMEM; + return SBC_ERR_NOMEM; } - return WERR_OK; + return SBC_ERR_OK; } /** * delete a parameter from configuration */ -static WERROR smbconf_txt_delete_parameter(struct smbconf_ctx *ctx, +static sbcErr smbconf_txt_delete_parameter(struct smbconf_ctx *ctx, const char *service, const char *param) { - return WERR_NOT_SUPPORTED; + return SBC_ERR_NOT_SUPPORTED; } -static WERROR smbconf_txt_get_includes(struct smbconf_ctx *ctx, +static sbcErr smbconf_txt_get_includes(struct smbconf_ctx *ctx, TALLOC_CTX *mem_ctx, const char *service, uint32_t *num_includes, char ***includes) { - WERROR werr; + sbcErr err; bool found; uint32_t sidx, count; TALLOC_CTX *tmp_ctx = NULL; uint32_t tmp_num_includes = 0; char **tmp_includes = NULL; - werr = smbconf_txt_load_file(ctx); - if (!W_ERROR_IS_OK(werr)) { - return werr; + err = smbconf_txt_load_file(ctx); + if (!SBC_ERROR_IS_OK(err)) { + return err; } found = smbconf_find_in_array(service, @@ -557,7 +556,7 @@ static WERROR smbconf_txt_get_includes(struct smbconf_ctx *ctx, pd(ctx)->cache->num_shares, &sidx); if (!found) { - return WERR_NO_SUCH_SERVICE; + return SBC_ERR_NO_SUCH_SERVICE; } tmp_ctx = talloc_stackframe(); @@ -566,11 +565,11 @@ static WERROR smbconf_txt_get_includes(struct smbconf_ctx *ctx, if (strequal(pd(ctx)->cache->param_names[sidx][count], "include")) { - werr = smbconf_add_string_to_array(tmp_ctx, + err = smbconf_add_string_to_array(tmp_ctx, &tmp_includes, tmp_num_includes, pd(ctx)->cache->param_values[sidx][count]); - if (!W_ERROR_IS_OK(werr)) { + if (!SBC_ERROR_IS_OK(err)) { goto done; } tmp_num_includes++; @@ -581,47 +580,47 @@ static WERROR smbconf_txt_get_includes(struct smbconf_ctx *ctx, if (*num_includes > 0) { *includes = talloc_move(mem_ctx, &tmp_includes); if (*includes == NULL) { - werr = WERR_NOMEM; + err = SBC_ERR_NOMEM; goto done; } } else { *includes = NULL; } - werr = WERR_OK; + err = SBC_ERR_OK; done: talloc_free(tmp_ctx); - return werr; + return err; } -static WERROR smbconf_txt_set_includes(struct smbconf_ctx *ctx, +static sbcErr smbconf_txt_set_includes(struct smbconf_ctx *ctx, const char *service, uint32_t num_includes, const char **includes) { - return WERR_NOT_SUPPORTED; + return SBC_ERR_NOT_SUPPORTED; } -static WERROR smbconf_txt_delete_includes(struct smbconf_ctx *ctx, +static sbcErr smbconf_txt_delete_includes(struct smbconf_ctx *ctx, const char *service) { - return WERR_NOT_SUPPORTED; + return SBC_ERR_NOT_SUPPORTED; } -static WERROR smbconf_txt_transaction_start(struct smbconf_ctx *ctx) +static sbcErr smbconf_txt_transaction_start(struct smbconf_ctx *ctx) { - return WERR_OK; + return SBC_ERR_OK; } -static WERROR smbconf_txt_transaction_commit(struct smbconf_ctx *ctx) +static sbcErr smbconf_txt_transaction_commit(struct smbconf_ctx *ctx) { - return WERR_OK; + return SBC_ERR_OK; } -static WERROR smbconf_txt_transaction_cancel(struct smbconf_ctx *ctx) +static sbcErr smbconf_txt_transaction_cancel(struct smbconf_ctx *ctx) { - return WERR_OK; + return SBC_ERR_OK; } static struct smbconf_ops smbconf_ops_txt = { @@ -654,15 +653,15 @@ static struct smbconf_ops smbconf_ops_txt = { * initialize the smbconf text backend * the only function that is exported from this module */ -WERROR smbconf_init_txt(TALLOC_CTX *mem_ctx, +sbcErr smbconf_init_txt(TALLOC_CTX *mem_ctx, struct smbconf_ctx **conf_ctx, const char *path) { - WERROR werr; + sbcErr err; - werr = smbconf_init_internal(mem_ctx, conf_ctx, path, &smbconf_ops_txt); - if (!W_ERROR_IS_OK(werr)) { - return werr; + err = smbconf_init_internal(mem_ctx, conf_ctx, path, &smbconf_ops_txt); + if (!SBC_ERROR_IS_OK(err)) { + return err; } return smbconf_txt_load_file(*conf_ctx); diff --git a/lib/smbconf/smbconf_txt.h b/lib/smbconf/smbconf_txt.h index 688bbc9d48..72d6207521 100644 --- a/lib/smbconf/smbconf_txt.h +++ b/lib/smbconf/smbconf_txt.h @@ -26,7 +26,7 @@ struct smbconf_ctx; * initialization functions for the text/file backend modules */ -WERROR smbconf_init_txt(TALLOC_CTX *mem_ctx, +sbcErr smbconf_init_txt(TALLOC_CTX *mem_ctx, struct smbconf_ctx **conf_ctx, const char *path); diff --git a/lib/smbconf/smbconf_util.c b/lib/smbconf/smbconf_util.c index b309a3454b..86a95988f1 100644 --- a/lib/smbconf/smbconf_util.c +++ b/lib/smbconf/smbconf_util.c @@ -39,43 +39,43 @@ static int smbconf_destroy_ctx(struct smbconf_ctx *ctx) * After the work with the configuration is completed, smbconf_shutdown() * should be called. */ -WERROR smbconf_init_internal(TALLOC_CTX *mem_ctx, struct smbconf_ctx **conf_ctx, +sbcErr smbconf_init_internal(TALLOC_CTX *mem_ctx, struct smbconf_ctx **conf_ctx, const char *path, struct smbconf_ops *ops) { - WERROR werr = WERR_OK; + sbcErr err = SBC_ERR_OK; struct smbconf_ctx *ctx; if (conf_ctx == NULL) { - return WERR_INVALID_PARAM; + return SBC_ERR_INVALID_PARAM; } ctx = talloc_zero(mem_ctx, struct smbconf_ctx); if (ctx == NULL) { - return WERR_NOMEM; + return SBC_ERR_NOMEM; } ctx->ops = ops; - werr = ctx->ops->init(ctx, path); - if (!W_ERROR_IS_OK(werr)) { + err = ctx->ops->init(ctx, path); + if (!SBC_ERROR_IS_OK(err)) { goto fail; } talloc_set_destructor(ctx, smbconf_destroy_ctx); *conf_ctx = ctx; - return werr; + return err; fail: talloc_free(ctx); - return werr; + return err; } /** * add a string to a talloced array of strings. */ -WERROR smbconf_add_string_to_array(TALLOC_CTX *mem_ctx, +sbcErr smbconf_add_string_to_array(TALLOC_CTX *mem_ctx, char ***array, uint32_t count, const char *string) @@ -83,12 +83,12 @@ WERROR smbconf_add_string_to_array(TALLOC_CTX *mem_ctx, char **new_array = NULL; if (array == NULL) { - return WERR_INVALID_PARAM; + return SBC_ERR_INVALID_PARAM; } new_array = talloc_realloc(mem_ctx, *array, char *, count + 1); if (new_array == NULL) { - return WERR_NOMEM; + return SBC_ERR_NOMEM; } if (string == NULL) { @@ -97,13 +97,13 @@ WERROR smbconf_add_string_to_array(TALLOC_CTX *mem_ctx, new_array[count] = talloc_strdup(new_array, string); if (new_array[count] == NULL) { talloc_free(new_array); - return WERR_NOMEM; + return SBC_ERR_NOMEM; } } *array = new_array; - return WERR_OK; + return SBC_ERR_OK; } bool smbconf_find_in_array(const char *string, char **list, diff --git a/lib/socket/interfaces.c b/lib/socket/interfaces.c new file mode 100644 index 0000000000..618714d1a7 --- /dev/null +++ b/lib/socket/interfaces.c @@ -0,0 +1,303 @@ +/* + Unix SMB/CIFS implementation. + return a list of network interfaces + Copyright (C) Andrew Tridgell 1998 + Copyright (C) Jeremy Allison 2007 + Copyright (C) Jelmer Vernooij 2007 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + + +#include "includes.h" +#include "system/network.h" +#include "interfaces.h" +#include "lib/util/tsort.h" + +/**************************************************************************** + Create a struct sockaddr_storage with the netmask bits set to 1. +****************************************************************************/ + +bool make_netmask(struct sockaddr_storage *pss_out, + const struct sockaddr_storage *pss_in, + unsigned long masklen) +{ + *pss_out = *pss_in; + /* Now apply masklen bits of mask. */ +#if defined(HAVE_IPV6) + if (pss_in->ss_family == AF_INET6) { + char *p = (char *)&((struct sockaddr_in6 *)pss_out)->sin6_addr; + unsigned int i; + + if (masklen > 128) { + return false; + } + for (i = 0; masklen >= 8; masklen -= 8, i++) { + *p++ = 0xff; + } + /* Deal with the partial byte. */ + *p++ &= (0xff & ~(0xff>>masklen)); + i++; + for (;i < sizeof(struct in6_addr); i++) { + *p++ = '\0'; + } + return true; + } +#endif + if (pss_in->ss_family == AF_INET) { + if (masklen > 32) { + return false; + } + ((struct sockaddr_in *)pss_out)->sin_addr.s_addr = + htonl(((0xFFFFFFFFL >> masklen) ^ 0xFFFFFFFFL)); + return true; + } + return false; +} + +/**************************************************************************** + Create a struct sockaddr_storage set to the broadcast or network adress from + an incoming sockaddr_storage. +****************************************************************************/ + +static void make_bcast_or_net(struct sockaddr_storage *pss_out, + const struct sockaddr_storage *pss_in, + const struct sockaddr_storage *nmask, + bool make_bcast_p) +{ + unsigned int i = 0, len = 0; + char *pmask = NULL; + char *p = NULL; + *pss_out = *pss_in; + + /* Set all zero netmask bits to 1. */ +#if defined(HAVE_IPV6) + if (pss_in->ss_family == AF_INET6) { + p = (char *)&((struct sockaddr_in6 *)pss_out)->sin6_addr; + pmask = discard_const_p(char, &((struct sockaddr_in6 *)nmask)->sin6_addr); + len = 16; + } +#endif + if (pss_in->ss_family == AF_INET) { + p = (char *)&((struct sockaddr_in *)pss_out)->sin_addr; + pmask = discard_const_p(char, &((struct sockaddr_in *)nmask)->sin_addr); + len = 4; + } + + for (i = 0; i < len; i++, p++, pmask++) { + if (make_bcast_p) { + *p = (*p & *pmask) | (*pmask ^ 0xff); + } else { + /* make_net */ + *p = (*p & *pmask); + } + } +} + +void make_bcast(struct sockaddr_storage *pss_out, + const struct sockaddr_storage *pss_in, + const struct sockaddr_storage *nmask) +{ + make_bcast_or_net(pss_out, pss_in, nmask, true); +} + +void make_net(struct sockaddr_storage *pss_out, + const struct sockaddr_storage *pss_in, + const struct sockaddr_storage *nmask) +{ + make_bcast_or_net(pss_out, pss_in, nmask, false); +} + + +/**************************************************************************** + Try the "standard" getifaddrs/freeifaddrs interfaces. + Also gets IPv6 interfaces. +****************************************************************************/ + +/**************************************************************************** + Get the netmask address for a local interface. +****************************************************************************/ + +static int _get_interfaces(TALLOC_CTX *mem_ctx, struct iface_struct **pifaces) +{ + struct iface_struct *ifaces; + struct ifaddrs *iflist = NULL; + struct ifaddrs *ifptr = NULL; + int count; + int total = 0; + size_t copy_size; + + if (getifaddrs(&iflist) < 0) { + return -1; + } + + count = 0; + for (ifptr = iflist; ifptr != NULL; ifptr = ifptr->ifa_next) { + if (!ifptr->ifa_addr || !ifptr->ifa_netmask) { + continue; + } + if (!(ifptr->ifa_flags & IFF_UP)) { + continue; + } + count += 1; + } + + ifaces = talloc_array(mem_ctx, struct iface_struct, count); + if (ifaces == NULL) { + errno = ENOMEM; + return -1; + } + + /* Loop through interfaces, looking for given IP address */ + for (ifptr = iflist; ifptr != NULL; ifptr = ifptr->ifa_next) { + + if (!ifptr->ifa_addr || !ifptr->ifa_netmask) { + continue; + } + + /* Check the interface is up. */ + if (!(ifptr->ifa_flags & IFF_UP)) { + continue; + } + + memset(&ifaces[total], '\0', sizeof(ifaces[total])); + + copy_size = sizeof(struct sockaddr_in); + + ifaces[total].flags = ifptr->ifa_flags; + +#if defined(HAVE_IPV6) + if (ifptr->ifa_addr->sa_family == AF_INET6) { + copy_size = sizeof(struct sockaddr_in6); + } +#endif + + memcpy(&ifaces[total].ip, ifptr->ifa_addr, copy_size); + memcpy(&ifaces[total].netmask, ifptr->ifa_netmask, copy_size); + + if (ifaces[total].flags & (IFF_BROADCAST|IFF_LOOPBACK)) { + make_bcast(&ifaces[total].bcast, + &ifaces[total].ip, + &ifaces[total].netmask); + } else if ((ifaces[total].flags & IFF_POINTOPOINT) && + ifptr->ifa_dstaddr ) { + memcpy(&ifaces[total].bcast, + ifptr->ifa_dstaddr, + copy_size); + } else { + continue; + } + + strlcpy(ifaces[total].name, ifptr->ifa_name, + sizeof(ifaces[total].name)); + total++; + } + + freeifaddrs(iflist); + + *pifaces = ifaces; + return total; +} + +static int iface_comp(struct iface_struct *i1, struct iface_struct *i2) +{ + int r; + +#if defined(HAVE_IPV6) + /* + * If we have IPv6 - sort these interfaces lower + * than any IPv4 ones. + */ + if (i1->ip.ss_family == AF_INET6 && + i2->ip.ss_family == AF_INET) { + return -1; + } else if (i1->ip.ss_family == AF_INET && + i2->ip.ss_family == AF_INET6) { + return 1; + } + + if (i1->ip.ss_family == AF_INET6) { + struct sockaddr_in6 *s1 = (struct sockaddr_in6 *)&i1->ip; + struct sockaddr_in6 *s2 = (struct sockaddr_in6 *)&i2->ip; + + r = memcmp(&s1->sin6_addr, + &s2->sin6_addr, + sizeof(struct in6_addr)); + if (r) { + return r; + } + + s1 = (struct sockaddr_in6 *)&i1->netmask; + s2 = (struct sockaddr_in6 *)&i2->netmask; + + r = memcmp(&s1->sin6_addr, + &s2->sin6_addr, + sizeof(struct in6_addr)); + if (r) { + return r; + } + } +#endif + + /* AIX uses __ss_family instead of ss_family inside of + sockaddr_storage. Instead of trying to figure out which field to + use, we can just cast it to a sockaddr. + */ + + if (((struct sockaddr *)&i1->ip)->sa_family == AF_INET) { + struct sockaddr_in *s1 = (struct sockaddr_in *)&i1->ip; + struct sockaddr_in *s2 = (struct sockaddr_in *)&i2->ip; + + r = ntohl(s1->sin_addr.s_addr) - + ntohl(s2->sin_addr.s_addr); + if (r) { + return r; + } + + s1 = (struct sockaddr_in *)&i1->netmask; + s2 = (struct sockaddr_in *)&i2->netmask; + + return ntohl(s1->sin_addr.s_addr) - + ntohl(s2->sin_addr.s_addr); + } + return 0; +} + +/* this wrapper is used to remove duplicates from the interface list generated + above */ +int get_interfaces(TALLOC_CTX *mem_ctx, struct iface_struct **pifaces) +{ + struct iface_struct *ifaces; + int total, i, j; + + total = _get_interfaces(mem_ctx, &ifaces); + if (total <= 0) return total; + + /* now we need to remove duplicates */ + TYPESAFE_QSORT(ifaces, total, iface_comp); + + for (i=1;i<total;) { + if (iface_comp(&ifaces[i-1], &ifaces[i]) == 0) { + for (j=i-1;j<total-1;j++) { + ifaces[j] = ifaces[j+1]; + } + total--; + } else { + i++; + } + } + + *pifaces = ifaces; + return total; +} diff --git a/lib/socket/interfaces.h b/lib/socket/interfaces.h new file mode 100644 index 0000000000..b4e113dcc8 --- /dev/null +++ b/lib/socket/interfaces.h @@ -0,0 +1,44 @@ +/* + Unix SMB/CIFS implementation. + + structures for lib/netif/ + + Copyright (C) Andrew Tridgell 2004 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include "system/network.h" + +struct iface_struct { + char name[16]; + int flags; + struct sockaddr_storage ip; + struct sockaddr_storage netmask; + struct sockaddr_storage bcast; +}; + +struct interface; + +bool make_netmask(struct sockaddr_storage *pss_out, + const struct sockaddr_storage *pss_in, + unsigned long masklen); +void make_bcast(struct sockaddr_storage *pss_out, + const struct sockaddr_storage *pss_in, + const struct sockaddr_storage *nmask); +void make_net(struct sockaddr_storage *pss_out, + const struct sockaddr_storage *pss_in, + const struct sockaddr_storage *nmask); + +int get_interfaces(TALLOC_CTX *mem_ctx, struct iface_struct **pifaces); diff --git a/lib/socket/wscript_build b/lib/socket/wscript_build new file mode 100644 index 0000000000..61bde129c5 --- /dev/null +++ b/lib/socket/wscript_build @@ -0,0 +1,7 @@ +#!/usr/bin/env python + +bld.SAMBA_LIBRARY('interfaces', + source='interfaces.c', + deps='samba-util', + private_library=True + ) diff --git a/lib/talloc/talloc.3.xml b/lib/talloc/talloc.3.xml index a327922dbe..99e8bcdb2f 100644 --- a/lib/talloc/talloc.3.xml +++ b/lib/talloc/talloc.3.xml @@ -783,9 +783,9 @@ if (ptr) memcpy(ptr, p, strlen(p)+1);</programlisting> </para> <para> This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3 of the License, or (at - your option) any later version. + it under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation; either version 3 of the + License, or (at your option) any later version. </para> <para> This program is distributed in the hope that it will be useful, but diff --git a/lib/talloc/talloc.c b/lib/talloc/talloc.c index 91452bfada..4700aa99e8 100644 --- a/lib/talloc/talloc.c +++ b/lib/talloc/talloc.c @@ -178,6 +178,32 @@ static struct { TC_INVALIDATE_SHRINK_VALGRIND_CHUNK(_tc, _new_size); \ } while (0) +#define TC_UNDEFINE_SHRINK_FILL_CHUNK(_tc, _new_size) do { \ + if (unlikely(talloc_fill.enabled)) { \ + size_t _flen = (_tc)->size - (_new_size); \ + char *_fptr = (char *)TC_PTR_FROM_CHUNK(_tc); \ + _fptr += (_new_size); \ + memset(_fptr, talloc_fill.fill_value, _flen); \ + } \ +} while (0) + +#if defined(DEVELOPER) && defined(VALGRIND_MAKE_MEM_UNDEFINED) +/* Mark the unused bytes as undefined */ +#define TC_UNDEFINE_SHRINK_VALGRIND_CHUNK(_tc, _new_size) do { \ + size_t _flen = (_tc)->size - (_new_size); \ + char *_fptr = (char *)TC_PTR_FROM_CHUNK(_tc); \ + _fptr += (_new_size); \ + VALGRIND_MAKE_MEM_UNDEFINED(_fptr, _flen); \ +} while (0) +#else +#define TC_UNDEFINE_SHRINK_VALGRIND_CHUNK(_tc, _new_size) do { } while (0) +#endif + +#define TC_UNDEFINE_SHRINK_CHUNK(_tc, _new_size) do { \ + TC_UNDEFINE_SHRINK_FILL_CHUNK(_tc, _new_size); \ + TC_UNDEFINE_SHRINK_VALGRIND_CHUNK(_tc, _new_size); \ +} while (0) + #if defined(DEVELOPER) && defined(VALGRIND_MAKE_MEM_UNDEFINED) /* Mark the new bytes as undefined */ #define TC_UNDEFINE_GROW_VALGRIND_CHUNK(_tc, _new_size) do { \ @@ -683,6 +709,69 @@ _PUBLIC_ void *_talloc_reference_loc(const void *context, const void *ptr, const static void *_talloc_steal_internal(const void *new_ctx, const void *ptr); +static inline void _talloc_free_poolmem(struct talloc_chunk *tc, + const char *location) +{ + struct talloc_chunk *pool; + void *next_tc; + unsigned int *pool_object_count; + + pool = (struct talloc_chunk *)tc->pool; + next_tc = TC_POOLMEM_NEXT_CHUNK(tc); + + tc->flags |= TALLOC_FLAG_FREE; + + /* we mark the freed memory with where we called the free + * from. This means on a double free error we can report where + * the first free came from + */ + tc->name = location; + + TC_INVALIDATE_FULL_CHUNK(tc); + + pool_object_count = talloc_pool_objectcount(pool); + + if (unlikely(*pool_object_count == 0)) { + talloc_abort("Pool object count zero!"); + return; + } + + *pool_object_count -= 1; + + if (unlikely(*pool_object_count == 1 && !(pool->flags & TALLOC_FLAG_FREE))) { + /* + * if there is just one object left in the pool + * and pool->flags does not have TALLOC_FLAG_FREE, + * it means this is the pool itself and + * the rest is available for new objects + * again. + */ + pool->pool = TC_POOL_FIRST_CHUNK(pool); + TC_INVALIDATE_POOL(pool); + } else if (unlikely(*pool_object_count == 0)) { + /* + * we mark the freed memory with where we called the free + * from. This means on a double free error we can report where + * the first free came from + */ + pool->name = location; + + TC_INVALIDATE_FULL_CHUNK(pool); + free(pool); + } else if (pool->pool == next_tc) { + /* + * if pool->pool still points to end of + * 'tc' (which is stored in the 'next_tc' variable), + * we can reclaim the memory of 'tc'. + */ + pool->pool = tc; + } +} + +static inline void _talloc_free_children_internal(struct talloc_chunk *tc, + void *ptr, + const char *location); + /* internal talloc_free call */ @@ -753,41 +842,7 @@ static inline int _talloc_free_internal(void *ptr, const char *location) tc->flags |= TALLOC_FLAG_LOOP; - while (tc->child) { - /* we need to work out who will own an abandoned child - if it cannot be freed. In priority order, the first - choice is owner of any remaining reference to this - pointer, the second choice is our parent, and the - final choice is the null context. */ - void *child = TC_PTR_FROM_CHUNK(tc->child); - const void *new_parent = null_context; - struct talloc_chunk *old_parent = NULL; - if (unlikely(tc->child->refs)) { - struct talloc_chunk *p = talloc_parent_chunk(tc->child->refs); - if (p) new_parent = TC_PTR_FROM_CHUNK(p); - } - /* finding the parent here is potentially quite - expensive, but the alternative, which is to change - talloc to always have a valid tc->parent pointer, - makes realloc more expensive where there are a - large number of children. - - The reason we need the parent pointer here is that - if _talloc_free_internal() fails due to references - or a failing destructor we need to re-parent, but - the free call can invalidate the prev pointer. - */ - if (new_parent == null_context && (tc->child->refs || tc->child->destructor)) { - old_parent = talloc_parent_chunk(ptr); - } - if (unlikely(_talloc_free_internal(child, location) == -1)) { - if (new_parent == null_context) { - struct talloc_chunk *p = old_parent; - if (p) new_parent = TC_PTR_FROM_CHUNK(p); - } - _talloc_steal_internal(new_parent, child); - } - } + _talloc_free_children_internal(tc, ptr, location); tc->flags |= TALLOC_FLAG_FREE; @@ -797,21 +852,10 @@ static inline int _talloc_free_internal(void *ptr, const char *location) */ tc->name = location; - if (tc->flags & (TALLOC_FLAG_POOL|TALLOC_FLAG_POOLMEM)) { - struct talloc_chunk *pool; - void *next_tc = NULL; + if (tc->flags & TALLOC_FLAG_POOL) { unsigned int *pool_object_count; - if (unlikely(tc->flags & TALLOC_FLAG_POOL)) { - pool = tc; - } else { - pool = (struct talloc_chunk *)tc->pool; - next_tc = TC_POOLMEM_NEXT_CHUNK(tc); - - TC_INVALIDATE_FULL_CHUNK(tc); - } - - pool_object_count = talloc_pool_objectcount(pool); + pool_object_count = talloc_pool_objectcount(tc); if (unlikely(*pool_object_count == 0)) { talloc_abort("Pool object count zero!"); @@ -820,26 +864,12 @@ static inline int _talloc_free_internal(void *ptr, const char *location) *pool_object_count -= 1; - if (unlikely(*pool_object_count == 1)) { - /* - * if there is just object left in the pool - * it means this is the pool itself and - * the rest is available for new objects - * again. - */ - pool->pool = TC_POOL_FIRST_CHUNK(pool); - TC_INVALIDATE_POOL(pool); - } else if (unlikely(*pool_object_count == 0)) { - TC_INVALIDATE_FULL_CHUNK(pool); - free(pool); - } else if (pool->pool == next_tc) { - /* - * if pool->pool still points to end of - * 'tc' (which is stored in the 'next_tc' variable), - * we can reclaim the memory of 'tc'. - */ - pool->pool = tc; + if (unlikely(*pool_object_count == 0)) { + TC_INVALIDATE_FULL_CHUNK(tc); + free(tc); } + } else if (tc->flags & TALLOC_FLAG_POOLMEM) { + _talloc_free_poolmem(tc, location); } else { TC_INVALIDATE_FULL_CHUNK(tc); free(tc); @@ -1204,21 +1234,10 @@ _PUBLIC_ void *talloc_init(const char *fmt, ...) return ptr; } -/* - this is a replacement for the Samba3 talloc_destroy_pool functionality. It - should probably not be used in new code. It's in here to keep the talloc - code consistent across Samba 3 and 4. -*/ -_PUBLIC_ void talloc_free_children(void *ptr) +static inline void _talloc_free_children_internal(struct talloc_chunk *tc, + void *ptr, + const char *location) { - struct talloc_chunk *tc; - - if (unlikely(ptr == NULL)) { - return; - } - - tc = talloc_chunk_from_ptr(ptr); - while (tc->child) { /* we need to work out who will own an abandoned child if it cannot be freed. In priority order, the first @@ -1227,13 +1246,28 @@ _PUBLIC_ void talloc_free_children(void *ptr) final choice is the null context. */ void *child = TC_PTR_FROM_CHUNK(tc->child); const void *new_parent = null_context; + struct talloc_chunk *old_parent = NULL; if (unlikely(tc->child->refs)) { struct talloc_chunk *p = talloc_parent_chunk(tc->child->refs); if (p) new_parent = TC_PTR_FROM_CHUNK(p); } - if (unlikely(talloc_free(child) == -1)) { + /* finding the parent here is potentially quite + expensive, but the alternative, which is to change + talloc to always have a valid tc->parent pointer, + makes realloc more expensive where there are a + large number of children. + + The reason we need the parent pointer here is that + if _talloc_free_internal() fails due to references + or a failing destructor we need to re-parent, but + the free call can invalidate the prev pointer. + */ + if (new_parent == null_context && (tc->child->refs || tc->child->destructor)) { + old_parent = talloc_parent_chunk(ptr); + } + if (unlikely(_talloc_free_internal(child, location) == -1)) { if (new_parent == null_context) { - struct talloc_chunk *p = talloc_parent_chunk(ptr); + struct talloc_chunk *p = old_parent; if (p) new_parent = TC_PTR_FROM_CHUNK(p); } _talloc_steal_internal(new_parent, child); @@ -1241,6 +1275,24 @@ _PUBLIC_ void talloc_free_children(void *ptr) } } +/* + this is a replacement for the Samba3 talloc_destroy_pool functionality. It + should probably not be used in new code. It's in here to keep the talloc + code consistent across Samba 3 and 4. +*/ +_PUBLIC_ void talloc_free_children(void *ptr) +{ + struct talloc_chunk *tc; + + if (unlikely(ptr == NULL)) { + return; + } + + tc = talloc_chunk_from_ptr(ptr); + + _talloc_free_children_internal(tc, ptr, __location__); +} + /* Allocate a bit of memory as a child of an existing pointer */ @@ -1365,7 +1417,16 @@ _PUBLIC_ void *_talloc_realloc(const void *context, void *ptr, size_t size, cons } return ptr; } else if ((tc->size - size) < 1024) { - TC_INVALIDATE_SHRINK_CHUNK(tc, size); + /* + * if we call TC_INVALIDATE_SHRINK_CHUNK() here + * we would need to call TC_UNDEFINE_GROW_CHUNK() + * after each realloc call, which slows down + * testing a lot :-(. + * + * That is why we only mark memory as undefined here. + */ + TC_UNDEFINE_SHRINK_CHUNK(tc, size); + /* do not shrink if we have less than 1k to gain */ tc->size = size; return ptr; @@ -1410,8 +1471,13 @@ _PUBLIC_ void *_talloc_realloc(const void *context, void *ptr, size_t size, cons size_t new_chunk_size = TC_ALIGN16(TC_HDR_SIZE + size); size_t space_needed; size_t space_left; + unsigned int chunk_count = *talloc_pool_objectcount(pool_tc); + + if (!(pool_tc->flags & TALLOC_FLAG_FREE)) { + chunk_count -= 1; + } - if (*talloc_pool_objectcount(pool_tc) == 2) { + if (chunk_count == 1) { /* * optimize for the case where 'tc' is the only * chunk in the pool. @@ -1438,6 +1504,7 @@ _PUBLIC_ void *_talloc_realloc(const void *context, void *ptr, size_t size, cons memmove(pool_tc->pool, tc, old_used); new_ptr = pool_tc->pool; + tc = (struct talloc_chunk *)new_ptr; TC_UNDEFINE_GROW_CHUNK(tc, size); /* @@ -1481,7 +1548,6 @@ _PUBLIC_ void *_talloc_realloc(const void *context, void *ptr, size_t size, cons } new_ptr = talloc_alloc_pool(tc, size + TC_HDR_SIZE); - *talloc_pool_objectcount(pool_tc) -= 1; if (new_ptr == NULL) { new_ptr = malloc(TC_HDR_SIZE+size); @@ -1490,21 +1556,8 @@ _PUBLIC_ void *_talloc_realloc(const void *context, void *ptr, size_t size, cons if (new_ptr) { memcpy(new_ptr, tc, MIN(tc->size,size) + TC_HDR_SIZE); - TC_INVALIDATE_FULL_CHUNK(tc); - if (*talloc_pool_objectcount(pool_tc) == 1) { - /* - * If the pool is empty now reclaim everything. - */ - pool_tc->pool = TC_POOL_FIRST_CHUNK(pool_tc); - TC_INVALIDATE_POOL(pool_tc); - } else if (next_tc == pool_tc->pool) { - /* - * If it was reallocated and tc was the last - * chunk, we can reclaim the memory of tc. - */ - pool_tc->pool = tc; - } + _talloc_free_poolmem(tc, __location__ "_talloc_realloc"); } } else { diff --git a/lib/talloc/testsuite.c b/lib/talloc/testsuite.c index ba583ab84e..90417c6ade 100644 --- a/lib/talloc/testsuite.c +++ b/lib/talloc/testsuite.c @@ -1128,23 +1128,31 @@ static bool test_pool(void) pool = talloc_pool(NULL, 1024); p1 = talloc_size(pool, 80); + memset(p1, 0x11, talloc_get_size(p1)); p2 = talloc_size(pool, 20); + memset(p2, 0x11, talloc_get_size(p2)); p3 = talloc_size(p1, 50); + memset(p3, 0x11, talloc_get_size(p3)); p4 = talloc_size(p3, 1000); + memset(p4, 0x11, talloc_get_size(p4)); #if 1 /* this relies on ALWAYS_REALLOC == 0 in talloc.c */ p2_2 = talloc_realloc_size(pool, p2, 20+1); torture_assert("pool realloc 20+1", p2_2 == p2, "failed: pointer changed"); + memset(p2, 0x11, talloc_get_size(p2)); p2_2 = talloc_realloc_size(pool, p2, 20-1); torture_assert("pool realloc 20-1", p2_2 == p2, "failed: pointer changed"); + memset(p2, 0x11, talloc_get_size(p2)); p2_2 = talloc_realloc_size(pool, p2, 20-1); torture_assert("pool realloc 20-1", p2_2 == p2, "failed: pointer changed"); + memset(p2, 0x11, talloc_get_size(p2)); talloc_free(p3); /* this should reclaim the memory of p4 and p3 */ p2_2 = talloc_realloc_size(pool, p2, 400); torture_assert("pool realloc 400", p2_2 == p2, "failed: pointer changed"); + memset(p2, 0x11, talloc_get_size(p2)); talloc_free(p1); @@ -1152,37 +1160,46 @@ static bool test_pool(void) p2_2 = talloc_realloc_size(pool, p2, 800); torture_assert("pool realloc 800", p2_2 == p1, "failed: pointer not changed"); p2 = p2_2; + memset(p2, 0x11, talloc_get_size(p2)); /* this should do a malloc */ p2_2 = talloc_realloc_size(pool, p2, 1800); torture_assert("pool realloc 1800", p2_2 != p2, "failed: pointer not changed"); p2 = p2_2; + memset(p2, 0x11, talloc_get_size(p2)); /* this should reclaim the memory from the pool */ p3 = talloc_size(pool, 80); torture_assert("pool alloc 80", p3 == p1, "failed: pointer changed"); + memset(p3, 0x11, talloc_get_size(p3)); talloc_free(p2); talloc_free(p3); p1 = talloc_size(pool, 80); + memset(p1, 0x11, talloc_get_size(p1)); p2 = talloc_size(pool, 20); + memset(p2, 0x11, talloc_get_size(p2)); talloc_free(p1); p2_2 = talloc_realloc_size(pool, p2, 20-1); torture_assert("pool realloc 20-1", p2_2 == p2, "failed: pointer changed"); + memset(p2, 0x11, talloc_get_size(p2)); p2_2 = talloc_realloc_size(pool, p2, 20-1); torture_assert("pool realloc 20-1", p2_2 == p2, "failed: pointer changed"); + memset(p2, 0x11, talloc_get_size(p2)); /* this should do a malloc */ p2_2 = talloc_realloc_size(pool, p2, 1800); torture_assert("pool realloc 1800", p2_2 != p2, "failed: pointer not changed"); p2 = p2_2; + memset(p2, 0x11, talloc_get_size(p2)); /* this should reclaim the memory from the pool */ p3 = talloc_size(pool, 800); torture_assert("pool alloc 800", p3 == p1, "failed: pointer changed"); + memset(p3, 0x11, talloc_get_size(p3)); #endif /* this relies on ALWAYS_REALLOC == 0 in talloc.c */ @@ -1191,6 +1208,73 @@ static bool test_pool(void) return true; } +static bool test_pool_steal(void) +{ + void *root; + void *pool; + void *p1, *p2; + void *p1_2, *p2_2; + size_t hdr; + size_t ofs1, ofs2; + + root = talloc_new(NULL); + pool = talloc_pool(root, 1024); + + p1 = talloc_size(pool, 4 * 16); + torture_assert("pool allocate 4 * 16", p1 != NULL, "failed "); + memset(p1, 0x11, talloc_get_size(p1)); + p2 = talloc_size(pool, 4 * 16); + torture_assert("pool allocate 4 * 16", p2 > p1, "failed: !(p2 > p1) "); + memset(p2, 0x11, talloc_get_size(p2)); + + ofs1 = PTR_DIFF(p2, p1); + hdr = ofs1 - talloc_get_size(p1); + + talloc_steal(root, p1); + talloc_steal(root, p2); + + talloc_free(pool); + + p1_2 = p1; + +#if 1 /* this relies on ALWAYS_REALLOC == 0 in talloc.c */ + p1_2 = talloc_realloc_size(root, p1, 5 * 16); + torture_assert("pool realloc 5 * 16", p1_2 > p2, "failed: pointer not changed"); + memset(p1_2, 0x11, talloc_get_size(p1_2)); + ofs1 = PTR_DIFF(p1_2, p2); + ofs2 = talloc_get_size(p2) + hdr; + + torture_assert("pool realloc ", ofs1 == ofs2, "failed: pointer offset unexpected"); + + p2_2 = talloc_realloc_size(root, p2, 3 * 16); + torture_assert("pool realloc 5 * 16", p2_2 == p2, "failed: pointer changed"); + memset(p2_2, 0x11, talloc_get_size(p2_2)); +#endif /* this relies on ALWAYS_REALLOC == 0 in talloc.c */ + + talloc_free(p1_2); + + p2_2 = p2; + +#if 1 /* this relies on ALWAYS_REALLOC == 0 in talloc.c */ + /* now we should reclaim the full pool */ + p2_2 = talloc_realloc_size(root, p2, 8 * 16); + torture_assert("pool realloc 8 * 16", p2_2 == p1, "failed: pointer not expected"); + p2 = p2_2; + memset(p2_2, 0x11, talloc_get_size(p2_2)); + + /* now we malloc and free the full pool space */ + p2_2 = talloc_realloc_size(root, p2, 2 * 1024); + torture_assert("pool realloc 2 * 1024", p2_2 != p1, "failed: pointer not expected"); + memset(p2_2, 0x11, talloc_get_size(p2_2)); + +#endif /* this relies on ALWAYS_REALLOC == 0 in talloc.c */ + + talloc_free(p2_2); + + talloc_free(root); + + return true; +} static bool test_free_ref_null_context(void) { @@ -1290,6 +1374,8 @@ bool torture_local_talloc(struct torture_context *tctx) test_reset(); ret &= test_pool(); test_reset(); + ret &= test_pool_steal(); + test_reset(); ret &= test_free_ref_null_context(); test_reset(); ret &= test_rusty(); diff --git a/lib/tdb/common/hash.c b/lib/tdb/common/hash.c index 2472ed1ace..1eed7221d2 100644 --- a/lib/tdb/common/hash.c +++ b/lib/tdb/common/hash.c @@ -214,9 +214,7 @@ static uint32_t hashlittle( const void *key, size_t length ) u.ptr = key; if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) { const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks */ -#ifdef VALGRIND const uint8_t *k8; -#endif /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */ while (length > 12) @@ -230,36 +228,6 @@ static uint32_t hashlittle( const void *key, size_t length ) } /*----------------------------- handle the last (probably partial) block */ - /* - * "k[2]&0xffffff" actually reads beyond the end of the string, but - * then masks off the part it's not allowed to read. Because the - * string is aligned, the masked-off tail is in the same word as the - * rest of the string. Every machine with memory protection I've seen - * does it on word boundaries, so is OK with this. But VALGRIND will - * still catch it and complain. The masking trick does make the hash - * noticably faster for short strings (like English words). - */ -#ifndef VALGRIND - - switch(length) - { - case 12: c+=k[2]; b+=k[1]; a+=k[0]; break; - case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break; - case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break; - case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break; - case 8 : b+=k[1]; a+=k[0]; break; - case 7 : b+=k[1]&0xffffff; a+=k[0]; break; - case 6 : b+=k[1]&0xffff; a+=k[0]; break; - case 5 : b+=k[1]&0xff; a+=k[0]; break; - case 4 : a+=k[0]; break; - case 3 : a+=k[0]&0xffffff; break; - case 2 : a+=k[0]&0xffff; break; - case 1 : a+=k[0]&0xff; break; - case 0 : return c; /* zero length strings require no mixing */ - } - -#else /* make valgrind happy */ - k8 = (const uint8_t *)k; switch(length) { @@ -277,9 +245,6 @@ static uint32_t hashlittle( const void *key, size_t length ) case 1 : a+=k8[0]; break; case 0 : return c; } - -#endif /* !valgrind */ - } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) { const uint16_t *k = (const uint16_t *)key; /* read 16-bit chunks */ const uint8_t *k8; diff --git a/lib/tdb/pytdb.c b/lib/tdb/pytdb.c index 0faba562de..3dd785e7be 100644 --- a/lib/tdb/pytdb.c +++ b/lib/tdb/pytdb.c @@ -558,6 +558,7 @@ static PyMethodDef tdb_methods[] = { { NULL } }; +void inittdb(void); void inittdb(void) { PyObject *m; diff --git a/lib/tdb/python/tests/simple.py b/lib/tdb/python/tests/simple.py index f5484a0523..2877092fe3 100644 --- a/lib/tdb/python/tests/simple.py +++ b/lib/tdb/python/tests/simple.py @@ -20,8 +20,13 @@ class OpenTdbTests(TestCase): class CloseTdbTests(TestCase): def test_double_close(self): - self.tdb = tdb.Tdb(tempfile.mkstemp()[1], 0, tdb.DEFAULT, - os.O_CREAT|os.O_RDWR) + # No hash size in tdb2. + if tdb.__version__.startswith("2"): + self.tdb = tdb.Tdb(tempfile.mkstemp()[1], tdb.DEFAULT, + os.O_CREAT|os.O_RDWR) + else: + self.tdb = tdb.Tdb(tempfile.mkstemp()[1], 0, tdb.DEFAULT, + os.O_CREAT|os.O_RDWR) self.assertNotEqual(None, self.tdb) # ensure that double close does not crash python @@ -42,8 +47,12 @@ class SimpleTdbTests(TestCase): def setUp(self): super(SimpleTdbTests, self).setUp() - self.tdb = tdb.Tdb(tempfile.mkstemp()[1], 0, tdb.DEFAULT, - os.O_CREAT|os.O_RDWR) + if tdb.__version__.startswith("2"): + self.tdb = tdb.Tdb(tempfile.mkstemp()[1], tdb.DEFAULT, + os.O_CREAT|os.O_RDWR) + else: + self.tdb = tdb.Tdb(tempfile.mkstemp()[1], 0, tdb.DEFAULT, + os.O_CREAT|os.O_RDWR) self.assertNotEqual(None, self.tdb) def tearDown(self): @@ -56,7 +65,8 @@ class SimpleTdbTests(TestCase): self.tdb.lock_all() def test_max_dead(self): - self.tdb.max_dead = 20 + if not tdb.__version__.startswith("2"): + self.tdb.max_dead = 20 def test_unlockall(self): self.tdb.lock_all() @@ -67,7 +77,8 @@ class SimpleTdbTests(TestCase): self.tdb.read_unlock_all() def test_reopen(self): - self.tdb.reopen() + if not tdb.__version__.startswith("2"): + self.tdb.reopen() def test_store(self): self.tdb.store("bar", "bla") @@ -75,7 +86,8 @@ class SimpleTdbTests(TestCase): def test_getitem(self): self.tdb["bar"] = "foo" - self.tdb.reopen() + if not tdb.__version__.startswith("2"): + self.tdb.reopen() self.assertEquals("foo", self.tdb["bar"]) def test_delete(self): @@ -91,13 +103,16 @@ class SimpleTdbTests(TestCase): self.assertRaises(KeyError, lambda: self.tdb["bla"]) def test_hash_size(self): - self.tdb.hash_size + if not tdb.__version__.startswith("2"): + self.tdb.hash_size def test_map_size(self): - self.tdb.map_size + if not tdb.__version__.startswith("2"): + self.tdb.map_size def test_freelist_size(self): - self.tdb.freelist_size + if not tdb.__version__.startswith("2"): + self.tdb.freelist_size def test_name(self): self.tdb.filename @@ -105,7 +120,9 @@ class SimpleTdbTests(TestCase): def test_iterator(self): self.tdb["bla"] = "1" self.tdb["brainslug"] = "2" - self.assertEquals(["bla", "brainslug"], list(self.tdb)) + l = list(self.tdb) + l.sort() + self.assertEquals(["bla", "brainslug"], l) def test_transaction_cancel(self): self.tdb["bloe"] = "2" @@ -143,17 +160,19 @@ class SimpleTdbTests(TestCase): self.assertEquals(0, len(list(self.tdb))) def test_repack(self): - self.tdb["foo"] = "abc" - self.tdb["bar"] = "def" - del self.tdb["foo"] - self.tdb.repack() + if not tdb.__version__.startswith("2"): + self.tdb["foo"] = "abc" + self.tdb["bar"] = "def" + del self.tdb["foo"] + self.tdb.repack() def test_seqnum(self): - self.tdb.enable_seqnum() - seq1 = self.tdb.seqnum - self.tdb.increment_seqnum_nonblock() - seq2 = self.tdb.seqnum - self.assertEquals(seq2-seq1, 1) + if not tdb.__version__.startswith("2"): + self.tdb.enable_seqnum() + seq1 = self.tdb.seqnum + self.tdb.increment_seqnum_nonblock() + seq2 = self.tdb.seqnum + self.assertEquals(seq2-seq1, 1) def test_len(self): self.assertEquals(0, len(list(self.tdb))) @@ -161,8 +180,12 @@ class SimpleTdbTests(TestCase): self.assertEquals(1, len(list(self.tdb))) def test_add_flags(self): - self.tdb.add_flags(tdb.NOMMAP) - self.tdb.remove_flags(tdb.NOMMAP) + if tdb.__version__.startswith("2"): + self.tdb.add_flag(tdb.NOMMAP) + self.tdb.remove_flag(tdb.NOMMAP) + else: + self.tdb.add_flags(tdb.NOMMAP) + self.tdb.remove_flags(tdb.NOMMAP) class VersionTests(TestCase): diff --git a/lib/tdb/tools/tdbrestore.c b/lib/tdb/tools/tdbrestore.c index 95ee360647..1daac63db1 100644 --- a/lib/tdb/tools/tdbrestore.c +++ b/lib/tdb/tools/tdbrestore.c @@ -170,7 +170,7 @@ static int read_rec(FILE *f, TDB_CONTEXT *tdb, int *eof) || (swallow(f, "}\n", NULL) == -1)) { goto fail; } - if (tdb_store(tdb, key, data, TDB_INSERT) == -1) { + if (tdb_store(tdb, key, data, TDB_INSERT) != 0) { fprintf(stderr, "TDB error: %s\n", tdb_errorstr(tdb)); goto fail; } diff --git a/lib/tdb/tools/tdbtool.c b/lib/tdb/tools/tdbtool.c index cd17f79e32..99d4841cf3 100644 --- a/lib/tdb/tools/tdbtool.c +++ b/lib/tdb/tools/tdbtool.c @@ -257,7 +257,7 @@ static void insert_tdb(char *keyname, size_t keylen, char* data, size_t datalen) dbuf.dptr = (unsigned char *)data; dbuf.dsize = datalen; - if (tdb_store(tdb, key, dbuf, TDB_INSERT) == -1) { + if (tdb_store(tdb, key, dbuf, TDB_INSERT) != 0) { terror("insert failed"); } } @@ -284,7 +284,7 @@ static void store_tdb(char *keyname, size_t keylen, char* data, size_t datalen) printf("Storing key:\n"); print_rec(tdb, key, dbuf, NULL); - if (tdb_store(tdb, key, dbuf, TDB_REPLACE) == -1) { + if (tdb_store(tdb, key, dbuf, TDB_REPLACE) != 0) { terror("store failed"); } } @@ -363,7 +363,7 @@ static void move_rec(char *keyname, size_t keylen, char* tdbname) return; } - if ( tdb_store( dst_tdb, key, dbuf, TDB_REPLACE ) == -1 ) { + if (tdb_store( dst_tdb, key, dbuf, TDB_REPLACE ) != 0) { terror("failed to move record"); } else diff --git a/lib/tdb2/LICENSE b/lib/tdb2/LICENSE new file mode 100644 index 0000000000..cca7fc278f --- /dev/null +++ b/lib/tdb2/LICENSE @@ -0,0 +1,165 @@ + GNU LESSER GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/> + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + + This version of the GNU Lesser General Public License incorporates +the terms and conditions of version 3 of the GNU General Public +License, supplemented by the additional permissions listed below. + + 0. Additional Definitions. + + As used herein, "this License" refers to version 3 of the GNU Lesser +General Public License, and the "GNU GPL" refers to version 3 of the GNU +General Public License. + + "The Library" refers to a covered work governed by this License, +other than an Application or a Combined Work as defined below. + + An "Application" is any work that makes use of an interface provided +by the Library, but which is not otherwise based on the Library. +Defining a subclass of a class defined by the Library is deemed a mode +of using an interface provided by the Library. + + A "Combined Work" is a work produced by combining or linking an +Application with the Library. The particular version of the Library +with which the Combined Work was made is also called the "Linked +Version". + + The "Minimal Corresponding Source" for a Combined Work means the +Corresponding Source for the Combined Work, excluding any source code +for portions of the Combined Work that, considered in isolation, are +based on the Application, and not on the Linked Version. + + The "Corresponding Application Code" for a Combined Work means the +object code and/or source code for the Application, including any data +and utility programs needed for reproducing the Combined Work from the +Application, but excluding the System Libraries of the Combined Work. + + 1. Exception to Section 3 of the GNU GPL. + + You may convey a covered work under sections 3 and 4 of this License +without being bound by section 3 of the GNU GPL. + + 2. Conveying Modified Versions. + + If you modify a copy of the Library, and, in your modifications, a +facility refers to a function or data to be supplied by an Application +that uses the facility (other than as an argument passed when the +facility is invoked), then you may convey a copy of the modified +version: + + a) under this License, provided that you make a good faith effort to + ensure that, in the event an Application does not supply the + function or data, the facility still operates, and performs + whatever part of its purpose remains meaningful, or + + b) under the GNU GPL, with none of the additional permissions of + this License applicable to that copy. + + 3. Object Code Incorporating Material from Library Header Files. + + The object code form of an Application may incorporate material from +a header file that is part of the Library. You may convey such object +code under terms of your choice, provided that, if the incorporated +material is not limited to numerical parameters, data structure +layouts and accessors, or small macros, inline functions and templates +(ten or fewer lines in length), you do both of the following: + + a) Give prominent notice with each copy of the object code that the + Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the object code with a copy of the GNU GPL and this license + document. + + 4. Combined Works. + + You may convey a Combined Work under terms of your choice that, +taken together, effectively do not restrict modification of the +portions of the Library contained in the Combined Work and reverse +engineering for debugging such modifications, if you also do each of +the following: + + a) Give prominent notice with each copy of the Combined Work that + the Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the Combined Work with a copy of the GNU GPL and this license + document. + + c) For a Combined Work that displays copyright notices during + execution, include the copyright notice for the Library among + these notices, as well as a reference directing the user to the + copies of the GNU GPL and this license document. + + d) Do one of the following: + + 0) Convey the Minimal Corresponding Source under the terms of this + License, and the Corresponding Application Code in a form + suitable for, and under terms that permit, the user to + recombine or relink the Application with a modified version of + the Linked Version to produce a modified Combined Work, in the + manner specified by section 6 of the GNU GPL for conveying + Corresponding Source. + + 1) Use a suitable shared library mechanism for linking with the + Library. A suitable mechanism is one that (a) uses at run time + a copy of the Library already present on the user's computer + system, and (b) will operate properly with a modified version + of the Library that is interface-compatible with the Linked + Version. + + e) Provide Installation Information, but only if you would otherwise + be required to provide such information under section 6 of the + GNU GPL, and only to the extent that such information is + necessary to install and execute a modified version of the + Combined Work produced by recombining or relinking the + Application with a modified version of the Linked Version. (If + you use option 4d0, the Installation Information must accompany + the Minimal Corresponding Source and Corresponding Application + Code. If you use option 4d1, you must provide the Installation + Information in the manner specified by section 6 of the GNU GPL + for conveying Corresponding Source.) + + 5. Combined Libraries. + + You may place library facilities that are a work based on the +Library side by side in a single library together with other library +facilities that are not Applications and are not covered by this +License, and convey such a combined library under terms of your +choice, if you do both of the following: + + a) Accompany the combined library with a copy of the same work based + on the Library, uncombined with any other library facilities, + conveyed under the terms of this License. + + b) Give prominent notice with the combined library that part of it + is a work based on the Library, and explaining where to find the + accompanying uncombined form of the same work. + + 6. Revised Versions of the GNU Lesser General Public License. + + The Free Software Foundation may publish revised and/or new versions +of the GNU Lesser General Public License from time to time. Such new +versions will be similar in spirit to the present version, but may +differ in detail to address new problems or concerns. + + Each version is given a distinguishing version number. If the +Library as you received it specifies that a certain numbered version +of the GNU Lesser General Public License "or any later version" +applies to it, you have the option of following the terms and +conditions either of that published version or of any later version +published by the Free Software Foundation. If the Library as you +received it does not specify a version number of the GNU Lesser +General Public License, you may choose any version of the GNU Lesser +General Public License ever published by the Free Software Foundation. + + If the Library as you received it specifies that a proxy can decide +whether future versions of the GNU Lesser General Public License shall +apply, that proxy's public statement of acceptance of any version is +permanent authorization for you to choose that version for the +Library. diff --git a/lib/tdb2/TODO b/lib/tdb2/TODO new file mode 100644 index 0000000000..0a9374f016 --- /dev/null +++ b/lib/tdb2/TODO @@ -0,0 +1,4 @@ +- tdb2restore, tdb2dump, tdb2backup +- tdb2tool man page +- Integrate ccan testsuite +- Integrate tdb2 testsuite diff --git a/lib/tdb2/_info b/lib/tdb2/_info new file mode 100644 index 0000000000..7213d67a22 --- /dev/null +++ b/lib/tdb2/_info @@ -0,0 +1,91 @@ +#include <string.h> +#include <stdio.h> + +/** + * tdb2 - [[WORK IN PROGRESS!]] The trivial (64bit transactional) database + * + * The tdb2 module provides an efficient keyword data mapping (usually + * within a file). It supports transactions, so the contents of the + * database is reliable even across crashes. + * + * Example: + * #include <ccan/tdb2/tdb2.h> + * #include <ccan/str/str.h> + * #include <err.h> + * #include <stdio.h> + * + * static void usage(const char *argv0) + * { + * errx(1, "Usage: %s fetch <dbfile> <key>\n" + * "OR %s store <dbfile> <key> <data>", argv0, argv0); + * } + * + * int main(int argc, char *argv[]) + * { + * struct tdb_context *tdb; + * TDB_DATA key, value; + * enum TDB_ERROR error; + * + * if (argc < 4) + * usage(argv[0]); + * + * tdb = tdb_open(argv[2], TDB_DEFAULT, O_CREAT|O_RDWR,0600, NULL); + * if (!tdb) + * err(1, "Opening %s", argv[2]); + * + * key.dptr = (void *)argv[3]; + * key.dsize = strlen(argv[3]); + * + * if (streq(argv[1], "fetch")) { + * if (argc != 4) + * usage(argv[0]); + * error = tdb_fetch(tdb, key, &value); + * if (error) + * errx(1, "fetch %s: %s", + * argv[3], tdb_errorstr(error)); + * printf("%.*s\n", value.dsize, (char *)value.dptr); + * free(value.dptr); + * } else if (streq(argv[1], "store")) { + * if (argc != 5) + * usage(argv[0]); + * value.dptr = (void *)argv[4]; + * value.dsize = strlen(argv[4]); + * error = tdb_store(tdb, key, value, 0); + * if (error) + * errx(1, "store %s: %s", + * argv[3], tdb_errorstr(error)); + * } else + * usage(argv[0]); + * + * return 0; + * } + * + * Maintainer: Rusty Russell <rusty@rustcorp.com.au> + * + * Author: Rusty Russell + * + * License: LGPLv3 (or later) + */ +int main(int argc, char *argv[]) +{ + if (argc != 2) + return 1; + + if (strcmp(argv[1], "depends") == 0) { + printf("ccan/asprintf\n"); + printf("ccan/hash\n"); + printf("ccan/likely\n"); + printf("ccan/asearch\n"); + printf("ccan/compiler\n"); + printf("ccan/build_assert\n"); + printf("ccan/ilog\n"); + printf("ccan/failtest\n"); + printf("ccan/tally\n"); + printf("ccan/typesafe_cb\n"); + printf("ccan/cast\n"); + printf("ccan/endian\n"); + return 0; + } + + return 1; +} diff --git a/lib/tdb2/check.c b/lib/tdb2/check.c new file mode 100644 index 0000000000..52fb188764 --- /dev/null +++ b/lib/tdb2/check.c @@ -0,0 +1,835 @@ + /* + Trivial Database 2: free list/block handling + Copyright (C) Rusty Russell 2010 + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 3 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, see <http://www.gnu.org/licenses/>. +*/ +#include "private.h" +#include <ccan/likely/likely.h> +#include <ccan/asearch/asearch.h> + +/* We keep an ordered array of offsets. */ +static bool append(tdb_off_t **arr, size_t *num, tdb_off_t off) +{ + tdb_off_t *new = realloc(*arr, (*num + 1) * sizeof(tdb_off_t)); + if (!new) + return false; + new[(*num)++] = off; + *arr = new; + return true; +} + +static enum TDB_ERROR check_header(struct tdb_context *tdb, tdb_off_t *recovery, + uint64_t *features) +{ + uint64_t hash_test; + struct tdb_header hdr; + enum TDB_ERROR ecode; + + ecode = tdb_read_convert(tdb, 0, &hdr, sizeof(hdr)); + if (ecode != TDB_SUCCESS) { + return ecode; + } + /* magic food should not be converted, so convert back. */ + tdb_convert(tdb, hdr.magic_food, sizeof(hdr.magic_food)); + + hash_test = TDB_HASH_MAGIC; + hash_test = tdb_hash(tdb, &hash_test, sizeof(hash_test)); + if (hdr.hash_test != hash_test) { + return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR, + "check: hash test %llu should be %llu", + (long long)hdr.hash_test, + (long long)hash_test); + } + + if (strcmp(hdr.magic_food, TDB_MAGIC_FOOD) != 0) { + return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR, + "check: bad magic '%.*s'", + (unsigned)sizeof(hdr.magic_food), + hdr.magic_food); + } + + /* Features which are used must be a subset of features offered. */ + if (hdr.features_used & ~hdr.features_offered) { + return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR, + "check: features used (0x%llx) which" + " are not offered (0x%llx)", + (long long)hdr.features_used, + (long long)hdr.features_offered); + } + + *features = hdr.features_offered; + *recovery = hdr.recovery; + if (*recovery) { + if (*recovery < sizeof(hdr) + || *recovery > tdb->file->map_size) { + return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR, + "tdb_check:" + " invalid recovery offset %zu", + (size_t)*recovery); + } + } + + /* Don't check reserved: they *can* be used later. */ + return TDB_SUCCESS; +} + +static enum TDB_ERROR check_hash_tree(struct tdb_context *tdb, + tdb_off_t off, unsigned int group_bits, + uint64_t hprefix, + unsigned hprefix_bits, + tdb_off_t used[], + size_t num_used, + size_t *num_found, + enum TDB_ERROR (*check)(TDB_DATA, + TDB_DATA, void *), + void *data); + +static enum TDB_ERROR check_hash_chain(struct tdb_context *tdb, + tdb_off_t off, + uint64_t hash, + tdb_off_t used[], + size_t num_used, + size_t *num_found, + enum TDB_ERROR (*check)(TDB_DATA, + TDB_DATA, + void *), + void *data) +{ + struct tdb_used_record rec; + enum TDB_ERROR ecode; + + ecode = tdb_read_convert(tdb, off, &rec, sizeof(rec)); + if (ecode != TDB_SUCCESS) { + return ecode; + } + + if (rec_magic(&rec) != TDB_CHAIN_MAGIC) { + return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR, + "tdb_check: Bad hash chain magic %llu", + (long long)rec_magic(&rec)); + } + + if (rec_data_length(&rec) != sizeof(struct tdb_chain)) { + return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR, + "tdb_check:" + " Bad hash chain length %llu vs %zu", + (long long)rec_data_length(&rec), + sizeof(struct tdb_chain)); + } + if (rec_key_length(&rec) != 0) { + return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR, + "tdb_check: Bad hash chain key length %llu", + (long long)rec_key_length(&rec)); + } + if (rec_hash(&rec) != 0) { + return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR, + "tdb_check: Bad hash chain hash value %llu", + (long long)rec_hash(&rec)); + } + + off += sizeof(rec); + ecode = check_hash_tree(tdb, off, 0, hash, 64, + used, num_used, num_found, check, data); + if (ecode != TDB_SUCCESS) { + return ecode; + } + + off = tdb_read_off(tdb, off + offsetof(struct tdb_chain, next)); + if (TDB_OFF_IS_ERR(off)) { + return off; + } + if (off == 0) + return TDB_SUCCESS; + (*num_found)++; + return check_hash_chain(tdb, off, hash, used, num_used, num_found, + check, data); +} + +static enum TDB_ERROR check_hash_record(struct tdb_context *tdb, + tdb_off_t off, + uint64_t hprefix, + unsigned hprefix_bits, + tdb_off_t used[], + size_t num_used, + size_t *num_found, + enum TDB_ERROR (*check)(TDB_DATA, + TDB_DATA, + void *), + void *data) +{ + struct tdb_used_record rec; + enum TDB_ERROR ecode; + + if (hprefix_bits >= 64) + return check_hash_chain(tdb, off, hprefix, used, num_used, + num_found, check, data); + + ecode = tdb_read_convert(tdb, off, &rec, sizeof(rec)); + if (ecode != TDB_SUCCESS) { + return ecode; + } + + if (rec_magic(&rec) != TDB_HTABLE_MAGIC) { + return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR, + "tdb_check: Bad hash table magic %llu", + (long long)rec_magic(&rec)); + } + if (rec_data_length(&rec) + != sizeof(tdb_off_t) << TDB_SUBLEVEL_HASH_BITS) { + return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR, + "tdb_check:" + " Bad hash table length %llu vs %llu", + (long long)rec_data_length(&rec), + (long long)sizeof(tdb_off_t) + << TDB_SUBLEVEL_HASH_BITS); + } + if (rec_key_length(&rec) != 0) { + return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR, + "tdb_check: Bad hash table key length %llu", + (long long)rec_key_length(&rec)); + } + if (rec_hash(&rec) != 0) { + return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR, + "tdb_check: Bad hash table hash value %llu", + (long long)rec_hash(&rec)); + } + + off += sizeof(rec); + return check_hash_tree(tdb, off, + TDB_SUBLEVEL_HASH_BITS-TDB_HASH_GROUP_BITS, + hprefix, hprefix_bits, + used, num_used, num_found, check, data); +} + +static int off_cmp(const tdb_off_t *a, const tdb_off_t *b) +{ + /* Can overflow an int. */ + return *a > *b ? 1 + : *a < *b ? -1 + : 0; +} + +static uint64_t get_bits(uint64_t h, unsigned num, unsigned *used) +{ + *used += num; + + return (h >> (64 - *used)) & ((1U << num) - 1); +} + +static enum TDB_ERROR check_hash_tree(struct tdb_context *tdb, + tdb_off_t off, unsigned int group_bits, + uint64_t hprefix, + unsigned hprefix_bits, + tdb_off_t used[], + size_t num_used, + size_t *num_found, + enum TDB_ERROR (*check)(TDB_DATA, + TDB_DATA, void *), + void *data) +{ + unsigned int g, b; + const tdb_off_t *hash; + struct tdb_used_record rec; + enum TDB_ERROR ecode; + + hash = tdb_access_read(tdb, off, + sizeof(tdb_off_t) + << (group_bits + TDB_HASH_GROUP_BITS), + true); + if (TDB_PTR_IS_ERR(hash)) { + return TDB_PTR_ERR(hash); + } + + for (g = 0; g < (1 << group_bits); g++) { + const tdb_off_t *group = hash + (g << TDB_HASH_GROUP_BITS); + for (b = 0; b < (1 << TDB_HASH_GROUP_BITS); b++) { + unsigned int bucket, i, used_bits; + uint64_t h; + tdb_off_t *p; + if (group[b] == 0) + continue; + + off = group[b] & TDB_OFF_MASK; + p = asearch(&off, used, num_used, off_cmp); + if (!p) { + ecode = tdb_logerr(tdb, TDB_ERR_CORRUPT, + TDB_LOG_ERROR, + "tdb_check: Invalid offset" + " %llu in hash", + (long long)off); + goto fail; + } + /* Mark it invalid. */ + *p ^= 1; + (*num_found)++; + + if (hprefix_bits == 64) { + /* Chained entries are unordered. */ + if (is_subhash(group[b])) { + ecode = TDB_ERR_CORRUPT; + tdb_logerr(tdb, ecode, + TDB_LOG_ERROR, + "tdb_check: Invalid chain" + " entry subhash"); + goto fail; + } + h = hash_record(tdb, off); + if (h != hprefix) { + ecode = TDB_ERR_CORRUPT; + tdb_logerr(tdb, ecode, + TDB_LOG_ERROR, + "check: bad hash chain" + " placement" + " 0x%llx vs 0x%llx", + (long long)h, + (long long)hprefix); + goto fail; + } + ecode = tdb_read_convert(tdb, off, &rec, + sizeof(rec)); + if (ecode != TDB_SUCCESS) { + goto fail; + } + goto check; + } + + if (is_subhash(group[b])) { + uint64_t subprefix; + subprefix = (hprefix + << (group_bits + TDB_HASH_GROUP_BITS)) + + g * (1 << TDB_HASH_GROUP_BITS) + b; + + ecode = check_hash_record(tdb, + group[b] & TDB_OFF_MASK, + subprefix, + hprefix_bits + + group_bits + + TDB_HASH_GROUP_BITS, + used, num_used, num_found, + check, data); + if (ecode != TDB_SUCCESS) { + goto fail; + } + continue; + } + /* A normal entry */ + + /* Does it belong here at all? */ + h = hash_record(tdb, off); + used_bits = 0; + if (get_bits(h, hprefix_bits, &used_bits) != hprefix + && hprefix_bits) { + ecode = tdb_logerr(tdb, TDB_ERR_CORRUPT, + TDB_LOG_ERROR, + "check: bad hash placement" + " 0x%llx vs 0x%llx", + (long long)h, + (long long)hprefix); + goto fail; + } + + /* Does it belong in this group? */ + if (get_bits(h, group_bits, &used_bits) != g) { + ecode = tdb_logerr(tdb, TDB_ERR_CORRUPT, + TDB_LOG_ERROR, + "check: bad group %llu" + " vs %u", + (long long)h, g); + goto fail; + } + + /* Are bucket bits correct? */ + bucket = group[b] & TDB_OFF_HASH_GROUP_MASK; + if (get_bits(h, TDB_HASH_GROUP_BITS, &used_bits) + != bucket) { + used_bits -= TDB_HASH_GROUP_BITS; + ecode = tdb_logerr(tdb, TDB_ERR_CORRUPT, + TDB_LOG_ERROR, + "check: bad bucket %u vs %u", + (unsigned)get_bits(h, + TDB_HASH_GROUP_BITS, + &used_bits), + bucket); + goto fail; + } + + /* There must not be any zero entries between + * the bucket it belongs in and this one! */ + for (i = bucket; + i != b; + i = (i + 1) % (1 << TDB_HASH_GROUP_BITS)) { + if (group[i] == 0) { + ecode = TDB_ERR_CORRUPT; + tdb_logerr(tdb, ecode, + TDB_LOG_ERROR, + "check: bad group placement" + " %u vs %u", + b, bucket); + goto fail; + } + } + + ecode = tdb_read_convert(tdb, off, &rec, sizeof(rec)); + if (ecode != TDB_SUCCESS) { + goto fail; + } + + /* Bottom bits must match header. */ + if ((h & ((1 << 11)-1)) != rec_hash(&rec)) { + ecode = tdb_logerr(tdb, TDB_ERR_CORRUPT, + TDB_LOG_ERROR, + "tdb_check: Bad hash magic" + " at offset %llu" + " (0x%llx vs 0x%llx)", + (long long)off, + (long long)h, + (long long)rec_hash(&rec)); + goto fail; + } + + check: + if (check) { + TDB_DATA k, d; + const unsigned char *kptr; + + kptr = tdb_access_read(tdb, + off + sizeof(rec), + rec_key_length(&rec) + + rec_data_length(&rec), + false); + if (TDB_PTR_IS_ERR(kptr)) { + ecode = TDB_PTR_ERR(kptr); + goto fail; + } + + k = tdb_mkdata(kptr, rec_key_length(&rec)); + d = tdb_mkdata(kptr + k.dsize, + rec_data_length(&rec)); + ecode = check(k, d, data); + tdb_access_release(tdb, kptr); + if (ecode != TDB_SUCCESS) { + goto fail; + } + } + } + } + tdb_access_release(tdb, hash); + return TDB_SUCCESS; + +fail: + tdb_access_release(tdb, hash); + return ecode; +} + +static enum TDB_ERROR check_hash(struct tdb_context *tdb, + tdb_off_t used[], + size_t num_used, size_t num_ftables, + int (*check)(TDB_DATA, TDB_DATA, void *), + void *data) +{ + /* Free tables also show up as used. */ + size_t num_found = num_ftables; + enum TDB_ERROR ecode; + + ecode = check_hash_tree(tdb, offsetof(struct tdb_header, hashtable), + TDB_TOPLEVEL_HASH_BITS-TDB_HASH_GROUP_BITS, + 0, 0, used, num_used, &num_found, + check, data); + if (ecode == TDB_SUCCESS) { + if (num_found != num_used) { + ecode = tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR, + "tdb_check: Not all entries" + " are in hash"); + } + } + return ecode; +} + +static enum TDB_ERROR check_free(struct tdb_context *tdb, + tdb_off_t off, + const struct tdb_free_record *frec, + tdb_off_t prev, unsigned int ftable, + unsigned int bucket) +{ + enum TDB_ERROR ecode; + + if (frec_magic(frec) != TDB_FREE_MAGIC) { + return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR, + "tdb_check: offset %llu bad magic 0x%llx", + (long long)off, + (long long)frec->magic_and_prev); + } + if (frec_ftable(frec) != ftable) { + return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR, + "tdb_check: offset %llu bad freetable %u", + (long long)off, frec_ftable(frec)); + + } + + ecode = tdb->methods->oob(tdb, off + + frec_len(frec) + + sizeof(struct tdb_used_record), + false); + if (ecode != TDB_SUCCESS) { + return ecode; + } + if (size_to_bucket(frec_len(frec)) != bucket) { + return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR, + "tdb_check: offset %llu in wrong bucket" + " (%u vs %u)", + (long long)off, + bucket, size_to_bucket(frec_len(frec))); + } + if (prev && prev != frec_prev(frec)) { + return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR, + "tdb_check: offset %llu bad prev" + " (%llu vs %llu)", + (long long)off, + (long long)prev, (long long)frec_len(frec)); + } + return TDB_SUCCESS; +} + +static enum TDB_ERROR check_free_table(struct tdb_context *tdb, + tdb_off_t ftable_off, + unsigned ftable_num, + tdb_off_t fr[], + size_t num_free, + size_t *num_found) +{ + struct tdb_freetable ft; + tdb_off_t h; + unsigned int i; + enum TDB_ERROR ecode; + + ecode = tdb_read_convert(tdb, ftable_off, &ft, sizeof(ft)); + if (ecode != TDB_SUCCESS) { + return ecode; + } + + if (rec_magic(&ft.hdr) != TDB_FTABLE_MAGIC + || rec_key_length(&ft.hdr) != 0 + || rec_data_length(&ft.hdr) != sizeof(ft) - sizeof(ft.hdr) + || rec_hash(&ft.hdr) != 0) { + return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR, + "tdb_check: Invalid header on free table"); + } + + for (i = 0; i < TDB_FREE_BUCKETS; i++) { + tdb_off_t off, prev = 0, *p, first = 0; + struct tdb_free_record f; + + h = bucket_off(ftable_off, i); + for (off = tdb_read_off(tdb, h); off; off = f.next) { + if (TDB_OFF_IS_ERR(off)) { + return off; + } + if (!first) { + off &= TDB_OFF_MASK; + first = off; + } + ecode = tdb_read_convert(tdb, off, &f, sizeof(f)); + if (ecode != TDB_SUCCESS) { + return ecode; + } + ecode = check_free(tdb, off, &f, prev, ftable_num, i); + if (ecode != TDB_SUCCESS) { + return ecode; + } + + /* FIXME: Check hash bits */ + p = asearch(&off, fr, num_free, off_cmp); + if (!p) { + return tdb_logerr(tdb, TDB_ERR_CORRUPT, + TDB_LOG_ERROR, + "tdb_check: Invalid offset" + " %llu in free table", + (long long)off); + } + /* Mark it invalid. */ + *p ^= 1; + (*num_found)++; + prev = off; + } + + if (first) { + /* Now we can check first back pointer. */ + ecode = tdb_read_convert(tdb, first, &f, sizeof(f)); + if (ecode != TDB_SUCCESS) { + return ecode; + } + ecode = check_free(tdb, first, &f, prev, ftable_num, i); + if (ecode != TDB_SUCCESS) { + return ecode; + } + } + } + return TDB_SUCCESS; +} + +/* Slow, but should be very rare. */ +tdb_off_t dead_space(struct tdb_context *tdb, tdb_off_t off) +{ + size_t len; + enum TDB_ERROR ecode; + + for (len = 0; off + len < tdb->file->map_size; len++) { + char c; + ecode = tdb->methods->tread(tdb, off, &c, 1); + if (ecode != TDB_SUCCESS) { + return ecode; + } + if (c != 0 && c != 0x43) + break; + } + return len; +} + +static enum TDB_ERROR check_linear(struct tdb_context *tdb, + tdb_off_t **used, size_t *num_used, + tdb_off_t **fr, size_t *num_free, + uint64_t features, tdb_off_t recovery) +{ + tdb_off_t off; + tdb_len_t len; + enum TDB_ERROR ecode; + bool found_recovery = false; + + for (off = sizeof(struct tdb_header); + off < tdb->file->map_size; + off += len) { + union { + struct tdb_used_record u; + struct tdb_free_record f; + struct tdb_recovery_record r; + } rec; + /* r is larger: only get that if we need to. */ + ecode = tdb_read_convert(tdb, off, &rec, sizeof(rec.f)); + if (ecode != TDB_SUCCESS) { + return ecode; + } + + /* If we crash after ftruncate, we can get zeroes or fill. */ + if (rec.r.magic == TDB_RECOVERY_INVALID_MAGIC + || rec.r.magic == 0x4343434343434343ULL) { + ecode = tdb_read_convert(tdb, off, &rec, sizeof(rec.r)); + if (ecode != TDB_SUCCESS) { + return ecode; + } + if (recovery == off) { + found_recovery = true; + len = sizeof(rec.r) + rec.r.max_len; + } else { + len = dead_space(tdb, off); + if (TDB_OFF_IS_ERR(len)) { + return len; + } + if (len < sizeof(rec.r)) { + return tdb_logerr(tdb, TDB_ERR_CORRUPT, + TDB_LOG_ERROR, + "tdb_check: invalid" + " dead space at %zu", + (size_t)off); + } + + tdb_logerr(tdb, TDB_SUCCESS, TDB_LOG_WARNING, + "Dead space at %zu-%zu (of %zu)", + (size_t)off, (size_t)(off + len), + (size_t)tdb->file->map_size); + } + } else if (rec.r.magic == TDB_RECOVERY_MAGIC) { + ecode = tdb_read_convert(tdb, off, &rec, sizeof(rec.r)); + if (ecode != TDB_SUCCESS) { + return ecode; + } + if (recovery != off) { + return tdb_logerr(tdb, TDB_ERR_CORRUPT, + TDB_LOG_ERROR, + "tdb_check: unexpected" + " recovery record at offset" + " %zu", + (size_t)off); + } + if (rec.r.len > rec.r.max_len) { + return tdb_logerr(tdb, TDB_ERR_CORRUPT, + TDB_LOG_ERROR, + "tdb_check: invalid recovery" + " length %zu", + (size_t)rec.r.len); + } + if (rec.r.eof > tdb->file->map_size) { + return tdb_logerr(tdb, TDB_ERR_CORRUPT, + TDB_LOG_ERROR, + "tdb_check: invalid old EOF" + " %zu", (size_t)rec.r.eof); + } + found_recovery = true; + len = sizeof(rec.r) + rec.r.max_len; + } else if (frec_magic(&rec.f) == TDB_FREE_MAGIC) { + len = sizeof(rec.u) + frec_len(&rec.f); + if (off + len > tdb->file->map_size) { + return tdb_logerr(tdb, TDB_ERR_CORRUPT, + TDB_LOG_ERROR, + "tdb_check: free overlength" + " %llu at offset %llu", + (long long)len, + (long long)off); + } + /* This record should be in free lists. */ + if (frec_ftable(&rec.f) != TDB_FTABLE_NONE + && !append(fr, num_free, off)) { + return tdb_logerr(tdb, TDB_ERR_OOM, + TDB_LOG_ERROR, + "tdb_check: tracking %zu'th" + " free record.", *num_free); + } + } else if (rec_magic(&rec.u) == TDB_USED_MAGIC + || rec_magic(&rec.u) == TDB_CHAIN_MAGIC + || rec_magic(&rec.u) == TDB_HTABLE_MAGIC + || rec_magic(&rec.u) == TDB_FTABLE_MAGIC) { + uint64_t klen, dlen, extra; + + /* This record is used! */ + if (!append(used, num_used, off)) { + return tdb_logerr(tdb, TDB_ERR_OOM, + TDB_LOG_ERROR, + "tdb_check: tracking %zu'th" + " used record.", *num_used); + } + + klen = rec_key_length(&rec.u); + dlen = rec_data_length(&rec.u); + extra = rec_extra_padding(&rec.u); + + len = sizeof(rec.u) + klen + dlen + extra; + if (off + len > tdb->file->map_size) { + return tdb_logerr(tdb, TDB_ERR_CORRUPT, + TDB_LOG_ERROR, + "tdb_check: used overlength" + " %llu at offset %llu", + (long long)len, + (long long)off); + } + + if (len < sizeof(rec.f)) { + return tdb_logerr(tdb, TDB_ERR_CORRUPT, + TDB_LOG_ERROR, + "tdb_check: too short record" + " %llu at %llu", + (long long)len, + (long long)off); + } + + /* Check that records have correct 0 at end (but may + * not in future). */ + if (extra && !features) { + const char *p; + char c; + p = tdb_access_read(tdb, off + sizeof(rec.u) + + klen + dlen, 1, false); + if (TDB_PTR_IS_ERR(p)) + return TDB_PTR_ERR(p); + c = *p; + tdb_access_release(tdb, p); + + if (c != '\0') { + return tdb_logerr(tdb, TDB_ERR_CORRUPT, + TDB_LOG_ERROR, + "tdb_check:" + " non-zero extra" + " at %llu", + (long long)off); + } + } + } else { + return tdb_logerr(tdb, TDB_ERR_CORRUPT, + TDB_LOG_ERROR, + "tdb_check: Bad magic 0x%llx" + " at offset %zu", + (long long)rec_magic(&rec.u), + (size_t)off); + } + } + + /* We must have found recovery area if there was one. */ + if (recovery != 0 && !found_recovery) { + return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR, + "tdb_check: expected a recovery area at %zu", + (size_t)recovery); + } + + return TDB_SUCCESS; +} + +enum TDB_ERROR tdb_check_(struct tdb_context *tdb, + enum TDB_ERROR (*check)(TDB_DATA, TDB_DATA, void *), + void *data) +{ + tdb_off_t *fr = NULL, *used = NULL, ft, recovery; + size_t num_free = 0, num_used = 0, num_found = 0, num_ftables = 0; + uint64_t features; + enum TDB_ERROR ecode; + + ecode = tdb_allrecord_lock(tdb, F_RDLCK, TDB_LOCK_WAIT, false); + if (ecode != TDB_SUCCESS) { + return tdb->last_error = ecode; + } + + ecode = tdb_lock_expand(tdb, F_RDLCK); + if (ecode != TDB_SUCCESS) { + tdb_allrecord_unlock(tdb, F_RDLCK); + return tdb->last_error = ecode; + } + + ecode = check_header(tdb, &recovery, &features); + if (ecode != TDB_SUCCESS) + goto out; + + /* First we do a linear scan, checking all records. */ + ecode = check_linear(tdb, &used, &num_used, &fr, &num_free, features, + recovery); + if (ecode != TDB_SUCCESS) + goto out; + + for (ft = first_ftable(tdb); ft; ft = next_ftable(tdb, ft)) { + if (TDB_OFF_IS_ERR(ft)) { + ecode = ft; + goto out; + } + ecode = check_free_table(tdb, ft, num_ftables, fr, num_free, + &num_found); + if (ecode != TDB_SUCCESS) + goto out; + num_ftables++; + } + + /* FIXME: Check key uniqueness? */ + ecode = check_hash(tdb, used, num_used, num_ftables, check, data); + if (ecode != TDB_SUCCESS) + goto out; + + if (num_found != num_free) { + ecode = tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR, + "tdb_check: Not all entries are in" + " free table"); + } + +out: + tdb_allrecord_unlock(tdb, F_RDLCK); + tdb_unlock_expand(tdb, F_RDLCK); + free(fr); + free(used); + return tdb->last_error = ecode; +} diff --git a/lib/tdb2/doc/TDB1_porting.txt b/lib/tdb2/doc/TDB1_porting.txt new file mode 100644 index 0000000000..90ba249738 --- /dev/null +++ b/lib/tdb2/doc/TDB1_porting.txt @@ -0,0 +1,44 @@ +Interface differences between TDB1 and TDB2. + +- tdb2 uses 'struct tdb_data', tdb1 uses 'struct TDB_DATA'. Use the + TDB_DATA typedef if you want portability between the two. + +- tdb2 functions return 0 on success, and a negative error on failure, + whereas tdb1 functions returned 0 on success, and -1 on failure. + tdb1 then used tdb_error() to determine the error; this is also + supported in tdb2 to ease backwards compatibility, though the other + form is preferred. + +- tdb2's tdb_fetch() returns an error, tdb1's returned the data directly + (or tdb_null, and you were supposed to check tdb_error() to find out why). + +- tdb2's tdb_nextkey() frees the old key's dptr, in tdb2 you needed to do + this manually. + +- tdb1's tdb_open/tdb_open_ex took an explicit hash size. tdb2's hash table + resizes as required. + +- tdb2 uses a linked list of attribute structures to implement logging and + alternate hashes. tdb1 used tdb_open_ex, which was not extensible. + +- tdb2 does locking on read-only databases (ie. O_RDONLY passed to tdb_open). + tdb1 did not: use the TDB_NOLOCK flag if you want to suppress locking. + +- tdb2's log function is simpler than tdb1's log function. The string is + already formatted, and it takes an enum tdb_log_level not a tdb_debug_level, + and which has only three values: TDB_LOG_ERROR, TDB_LOG_USE_ERROR and + TDB_LOG_WARNING. + +- tdb2 provides tdb_deq() for comparing two struct tdb_data. + +- tdb2's tdb_name() returns a copy of the name even for TDB_INTERNAL dbs. + +- tdb2 does not need tdb_reopen() or tdb_reopen_all(). If you call + fork() after during certain operations the child should close the + tdb, or complete the operations before continuing to use the tdb: + + tdb_transaction_start(): child must tdb_transaction_cancel() + tdb_lockall(): child must call tdb_unlockall() + tdb_lockall_read(): child must call tdb_unlockall_read() + tdb_chainlock(): child must call tdb_chainunlock() + tdb_parse() callback: child must return from tdb_parse() diff --git a/lib/tdb2/doc/design-1.3.txt b/lib/tdb2/doc/design-1.3.txt new file mode 100644 index 0000000000..f81ecf7885 --- /dev/null +++ b/lib/tdb2/doc/design-1.3.txt @@ -0,0 +1,1049 @@ +TDB2: A Redesigning The Trivial DataBase + +Rusty Russell, IBM Corporation + +27-April-2010 + +Abstract + +The Trivial DataBase on-disk format is 32 bits; with usage cases +heading towards the 4G limit, that must change. This required +breakage provides an opportunity to revisit TDB's other design +decisions and reassess them. + +1 Introduction + +The Trivial DataBase was originally written by Andrew Tridgell as +a simple key/data pair storage system with the same API as dbm, +but allowing multiple readers and writers while being small +enough (< 1000 lines of C) to include in SAMBA. The simple design +created in 1999 has proven surprisingly robust and performant, +used in Samba versions 3 and 4 as well as numerous other +projects. Its useful life was greatly increased by the +(backwards-compatible!) addition of transaction support in 2005. + +The wider variety and greater demands of TDB-using code has lead +to some organic growth of the API, as well as some compromises on +the implementation. None of these, by themselves, are seen as +show-stoppers, but the cumulative effect is to a loss of elegance +over the initial, simple TDB implementation. Here is a table of +the approximate number of lines of implementation code and number +of API functions at the end of each year: + + ++-----------+----------------+--------------------------------+ +| Year End | API Functions | Lines of C Code Implementation | ++-----------+----------------+--------------------------------+ ++-----------+----------------+--------------------------------+ +| 1999 | 13 | 1195 | ++-----------+----------------+--------------------------------+ +| 2000 | 24 | 1725 | ++-----------+----------------+--------------------------------+ +| 2001 | 32 | 2228 | ++-----------+----------------+--------------------------------+ +| 2002 | 35 | 2481 | ++-----------+----------------+--------------------------------+ +| 2003 | 35 | 2552 | ++-----------+----------------+--------------------------------+ +| 2004 | 40 | 2584 | ++-----------+----------------+--------------------------------+ +| 2005 | 38 | 2647 | ++-----------+----------------+--------------------------------+ +| 2006 | 52 | 3754 | ++-----------+----------------+--------------------------------+ +| 2007 | 66 | 4398 | ++-----------+----------------+--------------------------------+ +| 2008 | 71 | 4768 | ++-----------+----------------+--------------------------------+ +| 2009 | 73 | 5715 | ++-----------+----------------+--------------------------------+ + + +This review is an attempt to catalog and address all the known +issues with TDB and create solutions which address the problems +without significantly increasing complexity; all involved are far +too aware of the dangers of second system syndrome in rewriting a +successful project like this. + +2 API Issues + +2.1 tdb_open_ex Is Not Expandable + +The tdb_open() call was expanded to tdb_open_ex(), which added an +optional hashing function and an optional logging function +argument. Additional arguments to open would require the +introduction of a tdb_open_ex2 call etc. + +2.1.1 Proposed Solution + +tdb_open() will take a linked-list of attributes: + +enum tdb_attribute { + + TDB_ATTRIBUTE_LOG = 0, + + TDB_ATTRIBUTE_HASH = 1 + +}; + +struct tdb_attribute_base { + + enum tdb_attribute attr; + + union tdb_attribute *next; + +}; + +struct tdb_attribute_log { + + struct tdb_attribute_base base; /* .attr = TDB_ATTRIBUTE_LOG +*/ + + tdb_log_func log_fn; + + void *log_private; + +}; + +struct tdb_attribute_hash { + + struct tdb_attribute_base base; /* .attr = TDB_ATTRIBUTE_HASH +*/ + + tdb_hash_func hash_fn; + + void *hash_private; + +}; + +union tdb_attribute { + + struct tdb_attribute_base base; + + struct tdb_attribute_log log; + + struct tdb_attribute_hash hash; + +}; + +This allows future attributes to be added, even if this expands +the size of the union. + +2.2 tdb_traverse Makes Impossible Guarantees + +tdb_traverse (and tdb_firstkey/tdb_nextkey) predate transactions, +and it was thought that it was important to guarantee that all +records which exist at the start and end of the traversal would +be included, and no record would be included twice. + +This adds complexity (see[Reliable-Traversal-Adds]) and does not +work anyway for records which are altered (in particular, those +which are expanded may be effectively deleted and re-added behind +the traversal). + +2.2.1 <traverse-Proposed-Solution>Proposed Solution + +Abandon the guarantee. You will see every record if no changes +occur during your traversal, otherwise you will see some subset. +You can prevent changes by using a transaction or the locking +API. + +2.3 Nesting of Transactions Is Fraught + +TDB has alternated between allowing nested transactions and not +allowing them. Various paths in the Samba codebase assume that +transactions will nest, and in a sense they can: the operation is +only committed to disk when the outer transaction is committed. +There are two problems, however: + +1. Canceling the inner transaction will cause the outer + transaction commit to fail, and will not undo any operations + since the inner transaction began. This problem is soluble with + some additional internal code. + +2. An inner transaction commit can be cancelled by the outer + transaction. This is desirable in the way which Samba's + database initialization code uses transactions, but could be a + surprise to any users expecting a successful transaction commit + to expose changes to others. + +The current solution is to specify the behavior at tdb_open(), +with the default currently that nested transactions are allowed. +This flag can also be changed at runtime. + +2.3.1 Proposed Solution + +Given the usage patterns, it seems that the “least-surprise” +behavior of disallowing nested transactions should become the +default. Additionally, it seems the outer transaction is the only +code which knows whether inner transactions should be allowed, so +a flag to indicate this could be added to tdb_transaction_start. +However, this behavior can be simulated with a wrapper which uses +tdb_add_flags() and tdb_remove_flags(), so the API should not be +expanded for this relatively-obscure case. + +2.4 Incorrect Hash Function is Not Detected + +tdb_open_ex() allows the calling code to specify a different hash +function to use, but does not check that all other processes +accessing this tdb are using the same hash function. The result +is that records are missing from tdb_fetch(). + +2.4.1 Proposed Solution + +The header should contain an example hash result (eg. the hash of +0xdeadbeef), and tdb_open_ex() should check that the given hash +function produces the same answer, or fail the tdb_open call. + +2.5 tdb_set_max_dead/TDB_VOLATILE Expose Implementation + +In response to scalability issues with the free list ([TDB-Freelist-Is] +) two API workarounds have been incorporated in TDB: +tdb_set_max_dead() and the TDB_VOLATILE flag to tdb_open. The +latter actually calls the former with an argument of “5”. + +This code allows deleted records to accumulate without putting +them in the free list. On delete we iterate through each chain +and free them in a batch if there are more than max_dead entries. +These are never otherwise recycled except as a side-effect of a +tdb_repack. + +2.5.1 Proposed Solution + +With the scalability problems of the freelist solved, this API +can be removed. The TDB_VOLATILE flag may still be useful as a +hint that store and delete of records will be at least as common +as fetch in order to allow some internal tuning, but initially +will become a no-op. + +2.6 <TDB-Files-Cannot>TDB Files Cannot Be Opened Multiple Times + In The Same Process + +No process can open the same TDB twice; we check and disallow it. +This is an unfortunate side-effect of fcntl locks, which operate +on a per-file rather than per-file-descriptor basis, and do not +nest. Thus, closing any file descriptor on a file clears all the +locks obtained by this process, even if they were placed using a +different file descriptor! + +Note that even if this were solved, deadlock could occur if +operations were nested: this is a more manageable programming +error in most cases. + +2.6.1 Proposed Solution + +We could lobby POSIX to fix the perverse rules, or at least lobby +Linux to violate them so that the most common implementation does +not have this restriction. This would be a generally good idea +for other fcntl lock users. + +Samba uses a wrapper which hands out the same tdb_context to +multiple callers if this happens, and does simple reference +counting. We should do this inside the tdb library, which already +emulates lock nesting internally; it would need to recognize when +deadlock occurs within a single process. This would create a new +failure mode for tdb operations (while we currently handle +locking failures, they are impossible in normal use and a process +encountering them can do little but give up). + +I do not see benefit in an additional tdb_open flag to indicate +whether re-opening is allowed, as though there may be some +benefit to adding a call to detect when a tdb_context is shared, +to allow other to create such an API. + +2.7 TDB API Is Not POSIX Thread-safe + +The TDB API uses an error code which can be queried after an +operation to determine what went wrong. This programming model +does not work with threads, unless specific additional guarantees +are given by the implementation. In addition, even +otherwise-independent threads cannot open the same TDB (as in [TDB-Files-Cannot] +). + +2.7.1 Proposed Solution + +Reachitecting the API to include a tdb_errcode pointer would be a +great deal of churn; we are better to guarantee that the +tdb_errcode is per-thread so the current programming model can be +maintained. + +This requires dynamic per-thread allocations, which is awkward +with POSIX threads (pthread_key_create space is limited and we +cannot simply allocate a key for every TDB). + +Internal locking is required to make sure that fcntl locks do not +overlap between threads, and also that the global list of tdbs is +maintained. + +The aim is that building tdb with -DTDB_PTHREAD will result in a +pthread-safe version of the library, and otherwise no overhead +will exist. + +2.8 *_nonblock Functions And *_mark Functions Expose + Implementation + +CTDB[footnote: +Clustered TDB, see http://ctdb.samba.org +] wishes to operate on TDB in a non-blocking manner. This is +currently done as follows: + +1. Call the _nonblock variant of an API function (eg. + tdb_lockall_nonblock). If this fails: + +2. Fork a child process, and wait for it to call the normal + variant (eg. tdb_lockall). + +3. If the child succeeds, call the _mark variant to indicate we + already have the locks (eg. tdb_lockall_mark). + +4. Upon completion, tell the child to release the locks (eg. + tdb_unlockall). + +5. Indicate to tdb that it should consider the locks removed (eg. + tdb_unlockall_mark). + +There are several issues with this approach. Firstly, adding two +new variants of each function clutters the API for an obscure +use, and so not all functions have three variants. Secondly, it +assumes that all paths of the functions ask for the same locks, +otherwise the parent process will have to get a lock which the +child doesn't have under some circumstances. I don't believe this +is currently the case, but it constrains the implementation. + +2.8.1 <Proposed-Solution-locking-hook>Proposed Solution + +Implement a hook for locking methods, so that the caller can +control the calls to create and remove fcntl locks. In this +scenario, ctdbd would operate as follows: + +1. Call the normal API function, eg tdb_lockall(). + +2. When the lock callback comes in, check if the child has the + lock. Initially, this is always false. If so, return 0. + Otherwise, try to obtain it in non-blocking mode. If that + fails, return EWOULDBLOCK. + +3. Release locks in the unlock callback as normal. + +4. If tdb_lockall() fails, see if we recorded a lock failure; if + so, call the child to repeat the operation. + +5. The child records what locks it obtains, and returns that + information to the parent. + +6. When the child has succeeded, goto 1. + +This is flexible enough to handle any potential locking scenario, +even when lock requirements change. It can be optimized so that +the parent does not release locks, just tells the child which +locks it doesn't need to obtain. + +It also keeps the complexity out of the API, and in ctdbd where +it is needed. + +2.9 tdb_chainlock Functions Expose Implementation + +tdb_chainlock locks some number of records, including the record +indicated by the given key. This gave atomicity guarantees; +no-one can start a transaction, alter, read or delete that key +while the lock is held. + +It also makes the same guarantee for any other key in the chain, +which is an internal implementation detail and potentially a +cause for deadlock. + +2.9.1 Proposed Solution + +None. It would be nice to have an explicit single entry lock +which effected no other keys. Unfortunately, this won't work for +an entry which doesn't exist. Thus while chainlock may be +implemented more efficiently for the existing case, it will still +have overlap issues with the non-existing case. So it is best to +keep the current (lack of) guarantee about which records will be +effected to avoid constraining our implementation. + +2.10 Signal Handling is Not Race-Free + +The tdb_setalarm_sigptr() call allows the caller's signal handler +to indicate that the tdb locking code should return with a +failure, rather than trying again when a signal is received (and +errno == EAGAIN). This is usually used to implement timeouts. + +Unfortunately, this does not work in the case where the signal is +received before the tdb code enters the fcntl() call to place the +lock: the code will sleep within the fcntl() code, unaware that +the signal wants it to exit. In the case of long timeouts, this +does not happen in practice. + +2.10.1 Proposed Solution + +The locking hooks proposed in[Proposed-Solution-locking-hook] +would allow the user to decide on whether to fail the lock +acquisition on a signal. This allows the caller to choose their +own compromise: they could narrow the race by checking +immediately before the fcntl call.[footnote: +It may be possible to make this race-free in some implementations +by having the signal handler alter the struct flock to make it +invalid. This will cause the fcntl() lock call to fail with +EINVAL if the signal occurs before the kernel is entered, +otherwise EAGAIN. +] + +2.11 The API Uses Gratuitous Typedefs, Capitals + +typedefs are useful for providing source compatibility when types +can differ across implementations, or arguably in the case of +function pointer definitions which are hard for humans to parse. +Otherwise it is simply obfuscation and pollutes the namespace. + +Capitalization is usually reserved for compile-time constants and +macros. + + TDB_CONTEXT There is no reason to use this over 'struct + tdb_context'; the definition isn't visible to the API user + anyway. + + TDB_DATA There is no reason to use this over struct TDB_DATA; + the struct needs to be understood by the API user. + + struct TDB_DATA This would normally be called 'struct + tdb_data'. + + enum TDB_ERROR Similarly, this would normally be enum + tdb_error. + +2.11.1 Proposed Solution + +None. Introducing lower case variants would please pedants like +myself, but if it were done the existing ones should be kept. +There is little point forcing a purely cosmetic change upon tdb +users. + +2.12 <tdb_log_func-Doesnt-Take>tdb_log_func Doesn't Take The + Private Pointer + +For API compatibility reasons, the logging function needs to call +tdb_get_logging_private() to retrieve the pointer registered by +the tdb_open_ex for logging. + +2.12.1 Proposed Solution + +It should simply take an extra argument, since we are prepared to +break the API/ABI. + +2.13 Various Callback Functions Are Not Typesafe + +The callback functions in tdb_set_logging_function (after [tdb_log_func-Doesnt-Take] + is resolved), tdb_parse_record, tdb_traverse, tdb_traverse_read +and tdb_check all take void * and must internally convert it to +the argument type they were expecting. + +If this type changes, the compiler will not produce warnings on +the callers, since it only sees void *. + +2.13.1 Proposed Solution + +With careful use of macros, we can create callback functions +which give a warning when used on gcc and the types of the +callback and its private argument differ. Unsupported compilers +will not give a warning, which is no worse than now. In addition, +the callbacks become clearer, as they need not use void * for +their parameter. + +See CCAN's typesafe_cb module at +http://ccan.ozlabs.org/info/typesafe_cb.html + +2.14 TDB_CLEAR_IF_FIRST Must Be Specified On All Opens, + tdb_reopen_all Problematic + +The TDB_CLEAR_IF_FIRST flag to tdb_open indicates that the TDB +file should be cleared if the caller discovers it is the only +process with the TDB open. However, if any caller does not +specify TDB_CLEAR_IF_FIRST it will not be detected, so will have +the TDB erased underneath them (usually resulting in a crash). + +There is a similar issue on fork(); if the parent exits (or +otherwise closes the tdb) before the child calls tdb_reopen_all() +to establish the lock used to indicate the TDB is opened by +someone, a TDB_CLEAR_IF_FIRST opener at that moment will believe +it alone has opened the TDB and will erase it. + +2.14.1 Proposed Solution + +Remove TDB_CLEAR_IF_FIRST. Other workarounds are possible, but +see [TDB_CLEAR_IF_FIRST-Imposes-Performance]. + +3 Performance And Scalability Issues + +3.1 <TDB_CLEAR_IF_FIRST-Imposes-Performance>TDB_CLEAR_IF_FIRST + Imposes Performance Penalty + +When TDB_CLEAR_IF_FIRST is specified, a 1-byte read lock is +placed at offset 4 (aka. the ACTIVE_LOCK). While these locks +never conflict in normal tdb usage, they do add substantial +overhead for most fcntl lock implementations when the kernel +scans to detect if a lock conflict exists. This is often a single +linked list, making the time to acquire and release a fcntl lock +O(N) where N is the number of processes with the TDB open, not +the number actually doing work. + +In a Samba server it is common to have huge numbers of clients +sitting idle, and thus they have weaned themselves off the +TDB_CLEAR_IF_FIRST flag.[footnote: +There is a flag to tdb_reopen_all() which is used for this +optimization: if the parent process will outlive the child, the +child does not need the ACTIVE_LOCK. This is a workaround for +this very performance issue. +] + +3.1.1 Proposed Solution + +Remove the flag. It was a neat idea, but even trivial servers +tend to know when they are initializing for the first time and +can simply unlink the old tdb at that point. + +3.2 TDB Files Have a 4G Limit + +This seems to be becoming an issue (so much for “trivial”!), +particularly for ldb. + +3.2.1 Proposed Solution + +A new, incompatible TDB format which uses 64 bit offsets +internally rather than 32 bit as now. For simplicity of endian +conversion (which TDB does on the fly if required), all values +will be 64 bit on disk. In practice, some upper bits may be used +for other purposes, but at least 56 bits will be available for +file offsets. + +tdb_open() will automatically detect the old version, and even +create them if TDB_VERSION6 is specified to tdb_open. + +32 bit processes will still be able to access TDBs larger than 4G +(assuming that their off_t allows them to seek to 64 bits), they +will gracefully fall back as they fail to mmap. This can happen +already with large TDBs. + +Old versions of tdb will fail to open the new TDB files (since 28 +August 2009, commit 398d0c29290: prior to that any unrecognized +file format would be erased and initialized as a fresh tdb!) + +3.3 TDB Records Have a 4G Limit + +This has not been a reported problem, and the API uses size_t +which can be 64 bit on 64 bit platforms. However, other limits +may have made such an issue moot. + +3.3.1 Proposed Solution + +Record sizes will be 64 bit, with an error returned on 32 bit +platforms which try to access such records (the current +implementation would return TDB_ERR_OOM in a similar case). It +seems unlikely that 32 bit keys will be a limitation, so the +implementation may not support this (see [sub:Records-Incur-A]). + +3.4 Hash Size Is Determined At TDB Creation Time + +TDB contains a number of hash chains in the header; the number is +specified at creation time, and defaults to 131. This is such a +bottleneck on large databases (as each hash chain gets quite +long), that LDB uses 10,000 for this hash. In general it is +impossible to know what the 'right' answer is at database +creation time. + +3.4.1 Proposed Solution + +After comprehensive performance testing on various scalable hash +variants[footnote: +http://rusty.ozlabs.org/?p=89 and http://rusty.ozlabs.org/?p=94 +This was annoying because I was previously convinced that an +expanding tree of hashes would be very close to optimal. +], it became clear that it is hard to beat a straight linear hash +table which doubles in size when it reaches saturation. There are +three details which become important: + +1. On encountering a full bucket, we use the next bucket. + +2. Extra hash bits are stored with the offset, to reduce + comparisons. + +3. A marker entry is used on deleting an entry. + +The doubling of the table must be done under a transaction; we +will not reduce it on deletion, so it will be an unusual case. It +will either be placed at the head (other entries will be moved +out the way so we can expand). We could have a pointer in the +header to the current hashtable location, but that pointer would +have to be read frequently to check for hashtable moves. + +The locking for this is slightly more complex than the chained +case; we currently have one lock per bucket, and that means we +would need to expand the lock if we overflow to the next bucket. +The frequency of such collisions will effect our locking +heuristics: we can always lock more buckets than we need. + +One possible optimization is to only re-check the hash size on an +insert or a lookup miss. + +3.5 <TDB-Freelist-Is>TDB Freelist Is Highly Contended + +TDB uses a single linked list for the free list. Allocation +occurs as follows, using heuristics which have evolved over time: + +1. Get the free list lock for this whole operation. + +2. Multiply length by 1.25, so we always over-allocate by 25%. + +3. Set the slack multiplier to 1. + +4. Examine the current freelist entry: if it is > length but < + the current best case, remember it as the best case. + +5. Multiply the slack multiplier by 1.05. + +6. If our best fit so far is less than length * slack multiplier, + return it. The slack will be turned into a new free record if + it's large enough. + +7. Otherwise, go onto the next freelist entry. + +Deleting a record occurs as follows: + +1. Lock the hash chain for this whole operation. + +2. Walk the chain to find the record, keeping the prev pointer + offset. + +3. If max_dead is non-zero: + + (a) Walk the hash chain again and count the dead records. + + (b) If it's more than max_dead, bulk free all the dead ones + (similar to steps 4 and below, but the lock is only obtained + once). + + (c) Simply mark this record as dead and return. + +4. Get the free list lock for the remainder of this operation. + +5. <right-merging>Examine the following block to see if it is + free; if so, enlarge the current block and remove that block + from the free list. This was disabled, as removal from the free + list was O(entries-in-free-list). + +6. Examine the preceeding block to see if it is free: for this + reason, each block has a 32-bit tailer which indicates its + length. If it is free, expand it to cover our new block and + return. + +7. Otherwise, prepend ourselves to the free list. + +Disabling right-merging (step [right-merging]) causes +fragmentation; the other heuristics proved insufficient to +address this, so the final answer to this was that when we expand +the TDB file inside a transaction commit, we repack the entire +tdb. + +The single list lock limits our allocation rate; due to the other +issues this is not currently seen as a bottleneck. + +3.5.1 Proposed Solution + +The first step is to remove all the current heuristics, as they +obviously interact, then examine them once the lock contention is +addressed. + +The free list must be split to reduce contention. Assuming +perfect free merging, we can at most have 1 free list entry for +each entry. This implies that the number of free lists is related +to the size of the hash table, but as it is rare to walk a large +number of free list entries we can use far fewer, say 1/32 of the +number of hash buckets. + +There are various benefits in using per-size free lists (see [sub:TDB-Becomes-Fragmented] +) but it's not clear this would reduce contention in the common +case where all processes are allocating/freeing the same size. +Thus we almost certainly need to divide in other ways: the most +obvious is to divide the file into zones, and using a free list +(or set of free lists) for each. This approximates address +ordering. + +Note that this means we need to split the free lists when we +expand the file; this is probably acceptable when we double the +hash table size, since that is such an expensive operation +already. In the case of increasing the file size, there is an +optimization we can use: if we use M in the formula above as the +file size rounded up to the next power of 2, we only need +reshuffle free lists when the file size crosses a power of 2 +boundary, and reshuffling the free lists is trivial: we simply +merge every consecutive pair of free lists. + +The basic algorithm is as follows. Freeing is simple: + +1. Identify the correct zone. + +2. Lock the corresponding list. + +3. Re-check the zone (we didn't have a lock, sizes could have + changed): relock if necessary. + +4. Place the freed entry in the list for that zone. + +Allocation is a little more complicated, as we perform delayed +coalescing at this point: + +1. Pick a zone either the zone we last freed into, or based on a “ + random” number. + +2. Lock the corresponding list. + +3. Re-check the zone: relock if necessary. + +4. If the top entry is -large enough, remove it from the list and + return it. + +5. Otherwise, coalesce entries in the list. + + (a) + + (b) + + (c) + + (d) + +6. If there was no entry large enough, unlock the list and try + the next zone. + +7. + +8. + +9. If no zone satisfies, expand the file. + +This optimizes rapid insert/delete of free list entries by not +coalescing them all the time.. First-fit address ordering +ordering seems to be fairly good for keeping fragmentation low +(see [sub:TDB-Becomes-Fragmented]). Note that address ordering +does not need a tailer to coalesce, though if we needed one we +could have one cheaply: see [sub:Records-Incur-A]. + + + +I anticipate that the number of entries in each free zone would +be small, but it might be worth using one free entry to hold +pointers to the others for cache efficiency. + +3.6 <sub:TDB-Becomes-Fragmented>TDB Becomes Fragmented + +Much of this is a result of allocation strategy[footnote: +The Memory Fragmentation Problem: Solved? Johnstone & Wilson 1995 +ftp://ftp.cs.utexas.edu/pub/garbage/malloc/ismm98.ps +] and deliberate hobbling of coalescing; internal fragmentation +(aka overallocation) is deliberately set at 25%, and external +fragmentation is only cured by the decision to repack the entire +db when a transaction commit needs to enlarge the file. + +3.6.1 Proposed Solution + +The 25% overhead on allocation works in practice for ldb because +indexes tend to expand by one record at a time. This internal +fragmentation can be resolved by having an “expanded” bit in the +header to note entries that have previously expanded, and +allocating more space for them. + +There are is a spectrum of possible solutions for external +fragmentation: one is to use a fragmentation-avoiding allocation +strategy such as best-fit address-order allocator. The other end +of the spectrum would be to use a bump allocator (very fast and +simple) and simply repack the file when we reach the end. + +There are three problems with efficient fragmentation-avoiding +allocators: they are non-trivial, they tend to use a single free +list for each size, and there's no evidence that tdb allocation +patterns will match those recorded for general allocators (though +it seems likely). + +Thus we don't spend too much effort on external fragmentation; we +will be no worse than the current code if we need to repack on +occasion. More effort is spent on reducing freelist contention, +and reducing overhead. + +3.7 <sub:Records-Incur-A>Records Incur A 28-Byte Overhead + +Each TDB record has a header as follows: + +struct tdb_record { + + tdb_off_t next; /* offset of the next record in the list +*/ + + tdb_len_t rec_len; /* total byte length of record */ + + tdb_len_t key_len; /* byte length of key */ + + tdb_len_t data_len; /* byte length of data */ + + uint32_t full_hash; /* the full 32 bit hash of the key */ + + uint32_t magic; /* try to catch errors */ + + /* the following union is implied: + + union { + + char record[rec_len]; + + struct { + + char key[key_len]; + + char data[data_len]; + + } + + uint32_t totalsize; (tailer) + + } + + */ + +}; + +Naively, this would double to a 56-byte overhead on a 64 bit +implementation. + +3.7.1 Proposed Solution + +We can use various techniques to reduce this for an allocated +block: + +1. The 'next' pointer is not required, as we are using a flat + hash table. + +2. 'rec_len' can instead be expressed as an addition to key_len + and data_len (it accounts for wasted or overallocated length in + the record). Since the record length is always a multiple of 8, + we can conveniently fit it in 32 bits (representing up to 35 + bits). + +3. 'key_len' and 'data_len' can be reduced. I'm unwilling to + restrict 'data_len' to 32 bits, but instead we can combine the + two into one 64-bit field and using a 5 bit value which + indicates at what bit to divide the two. Keys are unlikely to + scale as fast as data, so I'm assuming a maximum key size of 32 + bits. + +4. 'full_hash' is used to avoid a memcmp on the “miss” case, but + this is diminishing returns after a handful of bits (at 10 + bits, it reduces 99.9% of false memcmp). As an aside, as the + lower bits are already incorporated in the hash table + resolution, the upper bits should be used here. + +5. 'magic' does not need to be enlarged: it currently reflects + one of 5 values (used, free, dead, recovery, and + unused_recovery). It is useful for quick sanity checking + however, and should not be eliminated. + +6. 'tailer' is only used to coalesce free blocks (so a block to + the right can find the header to check if this block is free). + This can be replaced by a single 'free' bit in the header of + the following block (and the tailer only exists in free + blocks).[footnote: +This technique from Thomas Standish. Data Structure Techniques. +Addison-Wesley, Reading, Massachusetts, 1980. +] The current proposed coalescing algorithm doesn't need this, + however. + +This produces a 16 byte used header like this: + +struct tdb_used_record { + + uint32_t magic : 16, + + prev_is_free: 1, + + key_data_divide: 5, + + top_hash: 10; + + uint32_t extra_octets; + + uint64_t key_and_data_len; + +}; + +And a free record like this: + +struct tdb_free_record { + + uint32_t free_magic; + + uint64_t total_length; + + ... + + uint64_t tailer; + +}; + + + +3.8 Transaction Commit Requires 4 fdatasync + +The current transaction algorithm is: + +1. write_recovery_data(); + +2. sync(); + +3. write_recovery_header(); + +4. sync(); + +5. overwrite_with_new_data(); + +6. sync(); + +7. remove_recovery_header(); + +8. sync(); + +On current ext3, each sync flushes all data to disk, so the next +3 syncs are relatively expensive. But this could become a +performance bottleneck on other filesystems such as ext4. + +3.8.1 Proposed Solution + + + + + + + + + +Neil Brown points out that this is overzealous, and only one sync +is needed: + +1. Bundle the recovery data, a transaction counter and a strong + checksum of the new data. + +2. Strong checksum that whole bundle. + +3. Store the bundle in the database. + +4. Overwrite the oldest of the two recovery pointers in the + header (identified using the transaction counter) with the + offset of this bundle. + +5. sync. + +6. Write the new data to the file. + +Checking for recovery means identifying the latest bundle with a +valid checksum and using the new data checksum to ensure that it +has been applied. This is more expensive than the current check, +but need only be done at open. For running databases, a separate +header field can be used to indicate a transaction in progress; +we need only check for recovery if this is set. + +3.9 TDB Does Not Have Snapshot Support + +3.9.1 Proposed Solution + +None. At some point you say “use a real database”. + +But as a thought experiment, if we implemented transactions to +only overwrite free entries (this is tricky: there must not be a +header in each entry which indicates whether it is free, but use +of presence in metadata elsewhere), and a pointer to the hash +table, we could create an entirely new commit without destroying +existing data. Then it would be easy to implement snapshots in a +similar way. + +This would not allow arbitrary changes to the database, such as +tdb_repack does, and would require more space (since we have to +preserve the current and future entries at once). If we used hash +trees rather than one big hash table, we might only have to +rewrite some sections of the hash, too. + +We could then implement snapshots using a similar method, using +multiple different hash tables/free tables. + +3.10 Transactions Cannot Operate in Parallel + +This would be useless for ldb, as it hits the index records with +just about every update. It would add significant complexity in +resolving clashes, and cause the all transaction callers to write +their code to loop in the case where the transactions spuriously +failed. + +3.10.1 Proposed Solution + +We could solve a small part of the problem by providing read-only +transactions. These would allow one write transaction to begin, +but it could not commit until all r/o transactions are done. This +would require a new RO_TRANSACTION_LOCK, which would be upgraded +on commit. + +3.11 Default Hash Function Is Suboptimal + +The Knuth-inspired multiplicative hash used by tdb is fairly slow +(especially if we expand it to 64 bits), and works best when the +hash bucket size is a prime number (which also means a slow +modulus). In addition, it is highly predictable which could +potentially lead to a Denial of Service attack in some TDB uses. + +3.11.1 Proposed Solution + +The Jenkins lookup3 hash[footnote: +http://burtleburtle.net/bob/c/lookup3.c +] is a fast and superbly-mixing hash. It's used by the Linux +kernel and almost everything else. This has the particular +properties that it takes an initial seed, and produces two 32 bit +hash numbers, which we can combine into a 64-bit hash. + +The seed should be created at tdb-creation time from some random +source, and placed in the header. This is far from foolproof, but +adds a little bit of protection against hash bombing. + +3.12 <Reliable-Traversal-Adds>Reliable Traversal Adds Complexity + +We lock a record during traversal iteration, and try to grab that +lock in the delete code. If that grab on delete fails, we simply +mark it deleted and continue onwards; traversal checks for this +condition and does the delete when it moves off the record. + +If traversal terminates, the dead record may be left +indefinitely. + +3.12.1 Proposed Solution + +Remove reliability guarantees; see [traverse-Proposed-Solution]. + +3.13 Fcntl Locking Adds Overhead + +Placing a fcntl lock means a system call, as does removing one. +This is actually one reason why transactions can be faster +(everything is locked once at transaction start). In the +uncontended case, this overhead can theoretically be eliminated. + +3.13.1 Proposed Solution + +None. + +We tried this before with spinlock support, in the early days of +TDB, and it didn't make much difference except in manufactured +benchmarks. + +We could use spinlocks (with futex kernel support under Linux), +but it means that we lose automatic cleanup when a process dies +with a lock. There is a method of auto-cleanup under Linux, but +it's not supported by other operating systems. We could +reintroduce a clear-if-first-style lock and sweep for dead +futexes on open, but that wouldn't help the normal case of one +concurrent opener dying. Increasingly elaborate repair schemes +could be considered, but they require an ABI change (everyone +must use them) anyway, so there's no need to do this at the same +time as everything else. diff --git a/lib/tdb2/doc/design.lyx b/lib/tdb2/doc/design.lyx new file mode 100644 index 0000000000..0a1d6a14bc --- /dev/null +++ b/lib/tdb2/doc/design.lyx @@ -0,0 +1,2689 @@ +#LyX 1.6.7 created this file. For more info see http://www.lyx.org/ +\lyxformat 345 +\begin_document +\begin_header +\textclass article +\use_default_options true +\language english +\inputencoding auto +\font_roman default +\font_sans default +\font_typewriter default +\font_default_family default +\font_sc false +\font_osf false +\font_sf_scale 100 +\font_tt_scale 100 + +\graphics default +\paperfontsize default +\use_hyperref false +\papersize default +\use_geometry false +\use_amsmath 1 +\use_esint 1 +\cite_engine basic +\use_bibtopic false +\paperorientation portrait +\secnumdepth 3 +\tocdepth 3 +\paragraph_separation indent +\defskip medskip +\quotes_language english +\papercolumns 1 +\papersides 1 +\paperpagestyle default +\tracking_changes true +\output_changes true +\author "" +\author "" +\end_header + +\begin_body + +\begin_layout Title +TDB2: A Redesigning The Trivial DataBase +\end_layout + +\begin_layout Author +Rusty Russell, IBM Corporation +\end_layout + +\begin_layout Date +17-March-2011 +\end_layout + +\begin_layout Abstract +The Trivial DataBase on-disk format is 32 bits; with usage cases heading + towards the 4G limit, that must change. + This required breakage provides an opportunity to revisit TDB's other design + decisions and reassess them. +\end_layout + +\begin_layout Section +Introduction +\end_layout + +\begin_layout Standard +The Trivial DataBase was originally written by Andrew Tridgell as a simple + key/data pair storage system with the same API as dbm, but allowing multiple + readers and writers while being small enough (< 1000 lines of C) to include + in SAMBA. + The simple design created in 1999 has proven surprisingly robust and performant +, used in Samba versions 3 and 4 as well as numerous other projects. + Its useful life was greatly increased by the (backwards-compatible!) addition + of transaction support in 2005. +\end_layout + +\begin_layout Standard +The wider variety and greater demands of TDB-using code has lead to some + organic growth of the API, as well as some compromises on the implementation. + None of these, by themselves, are seen as show-stoppers, but the cumulative + effect is to a loss of elegance over the initial, simple TDB implementation. + Here is a table of the approximate number of lines of implementation code + and number of API functions at the end of each year: +\end_layout + +\begin_layout Standard +\begin_inset Tabular +<lyxtabular version="3" rows="12" columns="3"> +<features> +<column alignment="center" valignment="top" width="0"> +<column alignment="center" valignment="top" width="0"> +<column alignment="center" valignment="top" width="0"> +<row> +<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +Year End +\end_layout + +\end_inset +</cell> +<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +API Functions +\end_layout + +\end_inset +</cell> +<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +Lines of C Code Implementation +\end_layout + +\end_inset +</cell> +</row> +<row> +<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +1999 +\end_layout + +\end_inset +</cell> +<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +13 +\end_layout + +\end_inset +</cell> +<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +1195 +\end_layout + +\end_inset +</cell> +</row> +<row> +<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +2000 +\end_layout + +\end_inset +</cell> +<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +24 +\end_layout + +\end_inset +</cell> +<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +1725 +\end_layout + +\end_inset +</cell> +</row> +<row> +<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +2001 +\end_layout + +\end_inset +</cell> +<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +32 +\end_layout + +\end_inset +</cell> +<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +2228 +\end_layout + +\end_inset +</cell> +</row> +<row> +<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +2002 +\end_layout + +\end_inset +</cell> +<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +35 +\end_layout + +\end_inset +</cell> +<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +2481 +\end_layout + +\end_inset +</cell> +</row> +<row> +<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +2003 +\end_layout + +\end_inset +</cell> +<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +35 +\end_layout + +\end_inset +</cell> +<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +2552 +\end_layout + +\end_inset +</cell> +</row> +<row> +<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +2004 +\end_layout + +\end_inset +</cell> +<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +40 +\end_layout + +\end_inset +</cell> +<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +2584 +\end_layout + +\end_inset +</cell> +</row> +<row> +<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +2005 +\end_layout + +\end_inset +</cell> +<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +38 +\end_layout + +\end_inset +</cell> +<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +2647 +\end_layout + +\end_inset +</cell> +</row> +<row> +<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +2006 +\end_layout + +\end_inset +</cell> +<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +52 +\end_layout + +\end_inset +</cell> +<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +3754 +\end_layout + +\end_inset +</cell> +</row> +<row> +<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +2007 +\end_layout + +\end_inset +</cell> +<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +66 +\end_layout + +\end_inset +</cell> +<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +4398 +\end_layout + +\end_inset +</cell> +</row> +<row> +<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +2008 +\end_layout + +\end_inset +</cell> +<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +71 +\end_layout + +\end_inset +</cell> +<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +4768 +\end_layout + +\end_inset +</cell> +</row> +<row> +<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +2009 +\end_layout + +\end_inset +</cell> +<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +73 +\end_layout + +\end_inset +</cell> +<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +5715 +\end_layout + +\end_inset +</cell> +</row> +</lyxtabular> + +\end_inset + + +\end_layout + +\begin_layout Standard +This review is an attempt to catalog and address all the known issues with + TDB and create solutions which address the problems without significantly + increasing complexity; all involved are far too aware of the dangers of + second system syndrome in rewriting a successful project like this. +\end_layout + +\begin_layout Section +API Issues +\end_layout + +\begin_layout Subsection +tdb_open_ex Is Not Expandable +\end_layout + +\begin_layout Standard +The tdb_open() call was expanded to tdb_open_ex(), which added an optional + hashing function and an optional logging function argument. + Additional arguments to open would require the introduction of a tdb_open_ex2 + call etc. +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\begin_inset CommandInset label +LatexCommand label +name "attributes" + +\end_inset + + +\end_layout + +\begin_layout Standard +tdb_open() will take a linked-list of attributes: +\end_layout + +\begin_layout LyX-Code +enum tdb_attribute { +\end_layout + +\begin_layout LyX-Code + TDB_ATTRIBUTE_LOG = 0, +\end_layout + +\begin_layout LyX-Code + TDB_ATTRIBUTE_HASH = 1 +\end_layout + +\begin_layout LyX-Code +}; +\end_layout + +\begin_layout LyX-Code +struct tdb_attribute_base { +\end_layout + +\begin_layout LyX-Code + enum tdb_attribute attr; +\end_layout + +\begin_layout LyX-Code + union tdb_attribute *next; +\end_layout + +\begin_layout LyX-Code +}; +\end_layout + +\begin_layout LyX-Code +struct tdb_attribute_log { +\end_layout + +\begin_layout LyX-Code + struct tdb_attribute_base base; /* .attr = TDB_ATTRIBUTE_LOG */ +\end_layout + +\begin_layout LyX-Code + tdb_log_func log_fn; +\end_layout + +\begin_layout LyX-Code + void *log_private; +\end_layout + +\begin_layout LyX-Code +}; +\end_layout + +\begin_layout LyX-Code +struct tdb_attribute_hash { +\end_layout + +\begin_layout LyX-Code + struct tdb_attribute_base base; /* .attr = TDB_ATTRIBUTE_HASH */ +\end_layout + +\begin_layout LyX-Code + tdb_hash_func hash_fn; +\end_layout + +\begin_layout LyX-Code + void *hash_private; +\end_layout + +\begin_layout LyX-Code +}; +\end_layout + +\begin_layout LyX-Code +union tdb_attribute { +\end_layout + +\begin_layout LyX-Code + struct tdb_attribute_base base; +\end_layout + +\begin_layout LyX-Code + struct tdb_attribute_log log; +\end_layout + +\begin_layout LyX-Code + struct tdb_attribute_hash hash; +\end_layout + +\begin_layout LyX-Code +}; +\end_layout + +\begin_layout Standard +This allows future attributes to be added, even if this expands the size + of the union. +\end_layout + +\begin_layout Subsubsection +Status +\end_layout + +\begin_layout Standard +Complete. +\end_layout + +\begin_layout Subsection +tdb_traverse Makes Impossible Guarantees +\end_layout + +\begin_layout Standard +tdb_traverse (and tdb_firstkey/tdb_nextkey) predate transactions, and it + was thought that it was important to guarantee that all records which exist + at the start and end of the traversal would be included, and no record + would be included twice. +\end_layout + +\begin_layout Standard +This adds complexity (see +\begin_inset CommandInset ref +LatexCommand ref +reference "Reliable-Traversal-Adds" + +\end_inset + +) and does not work anyway for records which are altered (in particular, + those which are expanded may be effectively deleted and re-added behind + the traversal). +\end_layout + +\begin_layout Subsubsection +\begin_inset CommandInset label +LatexCommand label +name "traverse-Proposed-Solution" + +\end_inset + +Proposed Solution +\end_layout + +\begin_layout Standard +Abandon the guarantee. + You will see every record if no changes occur during your traversal, otherwise + you will see some subset. + You can prevent changes by using a transaction or the locking API. +\end_layout + +\begin_layout Subsubsection +Status +\end_layout + +\begin_layout Standard +Complete. + Delete-during-traverse will still delete every record, too (assuming no + other changes). +\end_layout + +\begin_layout Subsection +Nesting of Transactions Is Fraught +\end_layout + +\begin_layout Standard +TDB has alternated between allowing nested transactions and not allowing + them. + Various paths in the Samba codebase assume that transactions will nest, + and in a sense they can: the operation is only committed to disk when the + outer transaction is committed. + There are two problems, however: +\end_layout + +\begin_layout Enumerate +Canceling the inner transaction will cause the outer transaction commit + to fail, and will not undo any operations since the inner transaction began. + This problem is soluble with some additional internal code. +\end_layout + +\begin_layout Enumerate +An inner transaction commit can be cancelled by the outer transaction. + This is desirable in the way which Samba's database initialization code + uses transactions, but could be a surprise to any users expecting a successful + transaction commit to expose changes to others. +\end_layout + +\begin_layout Standard +The current solution is to specify the behavior at tdb_open(), with the + default currently that nested transactions are allowed. + This flag can also be changed at runtime. +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +Given the usage patterns, it seems that the +\begin_inset Quotes eld +\end_inset + +least-surprise +\begin_inset Quotes erd +\end_inset + + behavior of disallowing nested transactions should become the default. + Additionally, it seems the outer transaction is the only code which knows + whether inner transactions should be allowed, so a flag to indicate this + could be added to tdb_transaction_start. + However, this behavior can be simulated with a wrapper which uses tdb_add_flags +() and tdb_remove_flags(), so the API should not be expanded for this relatively +-obscure case. +\end_layout + +\begin_layout Subsubsection +Status +\end_layout + +\begin_layout Standard +Complete; the nesting flag has been removed. +\end_layout + +\begin_layout Subsection +Incorrect Hash Function is Not Detected +\end_layout + +\begin_layout Standard +tdb_open_ex() allows the calling code to specify a different hash function + to use, but does not check that all other processes accessing this tdb + are using the same hash function. + The result is that records are missing from tdb_fetch(). +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +The header should contain an example hash result (eg. + the hash of 0xdeadbeef), and tdb_open_ex() should check that the given + hash function produces the same answer, or fail the tdb_open call. +\end_layout + +\begin_layout Subsubsection +Status +\end_layout + +\begin_layout Standard +Complete. +\end_layout + +\begin_layout Subsection +tdb_set_max_dead/TDB_VOLATILE Expose Implementation +\end_layout + +\begin_layout Standard +In response to scalability issues with the free list ( +\begin_inset CommandInset ref +LatexCommand ref +reference "TDB-Freelist-Is" + +\end_inset + +) two API workarounds have been incorporated in TDB: tdb_set_max_dead() + and the TDB_VOLATILE flag to tdb_open. + The latter actually calls the former with an argument of +\begin_inset Quotes eld +\end_inset + +5 +\begin_inset Quotes erd +\end_inset + +. +\end_layout + +\begin_layout Standard +This code allows deleted records to accumulate without putting them in the + free list. + On delete we iterate through each chain and free them in a batch if there + are more than max_dead entries. + These are never otherwise recycled except as a side-effect of a tdb_repack. +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +With the scalability problems of the freelist solved, this API can be removed. + The TDB_VOLATILE flag may still be useful as a hint that store and delete + of records will be at least as common as fetch in order to allow some internal + tuning, but initially will become a no-op. +\end_layout + +\begin_layout Subsubsection +Status +\end_layout + +\begin_layout Standard +Complete. + Unknown flags cause tdb_open() to fail as well, so they can be detected + at runtime. +\end_layout + +\begin_layout Subsection +\begin_inset CommandInset label +LatexCommand label +name "TDB-Files-Cannot" + +\end_inset + +TDB Files Cannot Be Opened Multiple Times In The Same Process +\end_layout + +\begin_layout Standard +No process can open the same TDB twice; we check and disallow it. + This is an unfortunate side-effect of fcntl locks, which operate on a per-file + rather than per-file-descriptor basis, and do not nest. + Thus, closing any file descriptor on a file clears all the locks obtained + by this process, even if they were placed using a different file descriptor! +\end_layout + +\begin_layout Standard +Note that even if this were solved, deadlock could occur if operations were + nested: this is a more manageable programming error in most cases. +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +We could lobby POSIX to fix the perverse rules, or at least lobby Linux + to violate them so that the most common implementation does not have this + restriction. + This would be a generally good idea for other fcntl lock users. +\end_layout + +\begin_layout Standard +Samba uses a wrapper which hands out the same tdb_context to multiple callers + if this happens, and does simple reference counting. + We should do this inside the tdb library, which already emulates lock nesting + internally; it would need to recognize when deadlock occurs within a single + process. + This would create a new failure mode for tdb operations (while we currently + handle locking failures, they are impossible in normal use and a process + encountering them can do little but give up). +\end_layout + +\begin_layout Standard +I do not see benefit in an additional tdb_open flag to indicate whether + re-opening is allowed, as though there may be some benefit to adding a + call to detect when a tdb_context is shared, to allow other to create such + an API. +\end_layout + +\begin_layout Subsubsection +Status +\end_layout + +\begin_layout Standard +Complete. +\end_layout + +\begin_layout Subsection +TDB API Is Not POSIX Thread-safe +\end_layout + +\begin_layout Standard +The TDB API uses an error code which can be queried after an operation to + determine what went wrong. + This programming model does not work with threads, unless specific additional + guarantees are given by the implementation. + In addition, even otherwise-independent threads cannot open the same TDB + (as in +\begin_inset CommandInset ref +LatexCommand ref +reference "TDB-Files-Cannot" + +\end_inset + +). +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +Reachitecting the API to include a tdb_errcode pointer would be a great + deal of churn, but fortunately most functions return 0 on success and -1 + on error: we can change these to return 0 on success and a negative error + code on error, and the API remains similar to previous. + The tdb_fetch, tdb_firstkey and tdb_nextkey functions need to take a TDB_DATA + pointer and return an error code. + It is also simpler to have tdb_nextkey replace its key argument in place, + freeing up any old .dptr. +\end_layout + +\begin_layout Standard +Internal locking is required to make sure that fcntl locks do not overlap + between threads, and also that the global list of tdbs is maintained. +\end_layout + +\begin_layout Standard +The aim is that building tdb with -DTDB_PTHREAD will result in a pthread-safe + version of the library, and otherwise no overhead will exist. + Alternatively, a hooking mechanism similar to that proposed for +\begin_inset CommandInset ref +LatexCommand ref +reference "Proposed-Solution-locking-hook" + +\end_inset + + could be used to enable pthread locking at runtime. +\end_layout + +\begin_layout Subsubsection +Status +\end_layout + +\begin_layout Standard +Incomplete; API has been changed but thread safety has not been implemented. +\end_layout + +\begin_layout Subsection +*_nonblock Functions And *_mark Functions Expose Implementation +\end_layout + +\begin_layout Standard +CTDB +\begin_inset Foot +status collapsed + +\begin_layout Plain Layout +Clustered TDB, see http://ctdb.samba.org +\end_layout + +\end_inset + + wishes to operate on TDB in a non-blocking manner. + This is currently done as follows: +\end_layout + +\begin_layout Enumerate +Call the _nonblock variant of an API function (eg. + tdb_lockall_nonblock). + If this fails: +\end_layout + +\begin_layout Enumerate +Fork a child process, and wait for it to call the normal variant (eg. + tdb_lockall). +\end_layout + +\begin_layout Enumerate +If the child succeeds, call the _mark variant to indicate we already have + the locks (eg. + tdb_lockall_mark). +\end_layout + +\begin_layout Enumerate +Upon completion, tell the child to release the locks (eg. + tdb_unlockall). +\end_layout + +\begin_layout Enumerate +Indicate to tdb that it should consider the locks removed (eg. + tdb_unlockall_mark). +\end_layout + +\begin_layout Standard +There are several issues with this approach. + Firstly, adding two new variants of each function clutters the API for + an obscure use, and so not all functions have three variants. + Secondly, it assumes that all paths of the functions ask for the same locks, + otherwise the parent process will have to get a lock which the child doesn't + have under some circumstances. + I don't believe this is currently the case, but it constrains the implementatio +n. + +\end_layout + +\begin_layout Subsubsection +\begin_inset CommandInset label +LatexCommand label +name "Proposed-Solution-locking-hook" + +\end_inset + +Proposed Solution +\end_layout + +\begin_layout Standard +Implement a hook for locking methods, so that the caller can control the + calls to create and remove fcntl locks. + In this scenario, ctdbd would operate as follows: +\end_layout + +\begin_layout Enumerate +Call the normal API function, eg tdb_lockall(). +\end_layout + +\begin_layout Enumerate +When the lock callback comes in, check if the child has the lock. + Initially, this is always false. + If so, return 0. + Otherwise, try to obtain it in non-blocking mode. + If that fails, return EWOULDBLOCK. +\end_layout + +\begin_layout Enumerate +Release locks in the unlock callback as normal. +\end_layout + +\begin_layout Enumerate +If tdb_lockall() fails, see if we recorded a lock failure; if so, call the + child to repeat the operation. +\end_layout + +\begin_layout Enumerate +The child records what locks it obtains, and returns that information to + the parent. +\end_layout + +\begin_layout Enumerate +When the child has succeeded, goto 1. +\end_layout + +\begin_layout Standard +This is flexible enough to handle any potential locking scenario, even when + lock requirements change. + It can be optimized so that the parent does not release locks, just tells + the child which locks it doesn't need to obtain. +\end_layout + +\begin_layout Standard +It also keeps the complexity out of the API, and in ctdbd where it is needed. +\end_layout + +\begin_layout Subsubsection +Status +\end_layout + +\begin_layout Standard +Incomplete. +\end_layout + +\begin_layout Subsection +tdb_chainlock Functions Expose Implementation +\end_layout + +\begin_layout Standard +tdb_chainlock locks some number of records, including the record indicated + by the given key. + This gave atomicity guarantees; no-one can start a transaction, alter, + read or delete that key while the lock is held. +\end_layout + +\begin_layout Standard +It also makes the same guarantee for any other key in the chain, which is + an internal implementation detail and potentially a cause for deadlock. +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +None. + It would be nice to have an explicit single entry lock which effected no + other keys. + Unfortunately, this won't work for an entry which doesn't exist. + Thus while chainlock may be implemented more efficiently for the existing + case, it will still have overlap issues with the non-existing case. + So it is best to keep the current (lack of) guarantee about which records + will be effected to avoid constraining our implementation. +\end_layout + +\begin_layout Subsection +Signal Handling is Not Race-Free +\end_layout + +\begin_layout Standard +The tdb_setalarm_sigptr() call allows the caller's signal handler to indicate + that the tdb locking code should return with a failure, rather than trying + again when a signal is received (and errno == EAGAIN). + This is usually used to implement timeouts. +\end_layout + +\begin_layout Standard +Unfortunately, this does not work in the case where the signal is received + before the tdb code enters the fcntl() call to place the lock: the code + will sleep within the fcntl() code, unaware that the signal wants it to + exit. + In the case of long timeouts, this does not happen in practice. +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +The locking hooks proposed in +\begin_inset CommandInset ref +LatexCommand ref +reference "Proposed-Solution-locking-hook" + +\end_inset + + would allow the user to decide on whether to fail the lock acquisition + on a signal. + This allows the caller to choose their own compromise: they could narrow + the race by checking immediately before the fcntl call. +\begin_inset Foot +status collapsed + +\begin_layout Plain Layout +It may be possible to make this race-free in some implementations by having + the signal handler alter the struct flock to make it invalid. + This will cause the fcntl() lock call to fail with EINVAL if the signal + occurs before the kernel is entered, otherwise EAGAIN. +\end_layout + +\end_inset + + +\end_layout + +\begin_layout Subsubsection +Status +\end_layout + +\begin_layout Standard +Incomplete. +\end_layout + +\begin_layout Subsection +The API Uses Gratuitous Typedefs, Capitals +\end_layout + +\begin_layout Standard +typedefs are useful for providing source compatibility when types can differ + across implementations, or arguably in the case of function pointer definitions + which are hard for humans to parse. + Otherwise it is simply obfuscation and pollutes the namespace. +\end_layout + +\begin_layout Standard +Capitalization is usually reserved for compile-time constants and macros. +\end_layout + +\begin_layout Description +TDB_CONTEXT There is no reason to use this over 'struct tdb_context'; the + definition isn't visible to the API user anyway. +\end_layout + +\begin_layout Description +TDB_DATA There is no reason to use this over struct TDB_DATA; the struct + needs to be understood by the API user. +\end_layout + +\begin_layout Description +struct +\begin_inset space ~ +\end_inset + +TDB_DATA This would normally be called 'struct tdb_data'. +\end_layout + +\begin_layout Description +enum +\begin_inset space ~ +\end_inset + +TDB_ERROR Similarly, this would normally be enum tdb_error. +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +None. + Introducing lower case variants would please pedants like myself, but if + it were done the existing ones should be kept. + There is little point forcing a purely cosmetic change upon tdb users. +\end_layout + +\begin_layout Subsection +\begin_inset CommandInset label +LatexCommand label +name "tdb_log_func-Doesnt-Take" + +\end_inset + +tdb_log_func Doesn't Take The Private Pointer +\end_layout + +\begin_layout Standard +For API compatibility reasons, the logging function needs to call tdb_get_loggin +g_private() to retrieve the pointer registered by the tdb_open_ex for logging. +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +It should simply take an extra argument, since we are prepared to break + the API/ABI. +\end_layout + +\begin_layout Subsubsection +Status +\end_layout + +\begin_layout Standard +Complete. +\end_layout + +\begin_layout Subsection +Various Callback Functions Are Not Typesafe +\end_layout + +\begin_layout Standard +The callback functions in tdb_set_logging_function (after +\begin_inset CommandInset ref +LatexCommand ref +reference "tdb_log_func-Doesnt-Take" + +\end_inset + + is resolved), tdb_parse_record, tdb_traverse, tdb_traverse_read and tdb_check + all take void * and must internally convert it to the argument type they + were expecting. +\end_layout + +\begin_layout Standard +If this type changes, the compiler will not produce warnings on the callers, + since it only sees void *. +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +With careful use of macros, we can create callback functions which give + a warning when used on gcc and the types of the callback and its private + argument differ. + Unsupported compilers will not give a warning, which is no worse than now. + In addition, the callbacks become clearer, as they need not use void * + for their parameter. +\end_layout + +\begin_layout Standard +See CCAN's typesafe_cb module at http://ccan.ozlabs.org/info/typesafe_cb.html +\end_layout + +\begin_layout Subsubsection +Status +\end_layout + +\begin_layout Standard +Complete. +\end_layout + +\begin_layout Subsection +TDB_CLEAR_IF_FIRST Must Be Specified On All Opens, tdb_reopen_all Problematic +\end_layout + +\begin_layout Standard +The TDB_CLEAR_IF_FIRST flag to tdb_open indicates that the TDB file should + be cleared if the caller discovers it is the only process with the TDB + open. + However, if any caller does not specify TDB_CLEAR_IF_FIRST it will not + be detected, so will have the TDB erased underneath them (usually resulting + in a crash). +\end_layout + +\begin_layout Standard +There is a similar issue on fork(); if the parent exits (or otherwise closes + the tdb) before the child calls tdb_reopen_all() to establish the lock + used to indicate the TDB is opened by someone, a TDB_CLEAR_IF_FIRST opener + at that moment will believe it alone has opened the TDB and will erase + it. +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +Remove TDB_CLEAR_IF_FIRST. + Other workarounds are possible, but see +\begin_inset CommandInset ref +LatexCommand ref +reference "TDB_CLEAR_IF_FIRST-Imposes-Performance" + +\end_inset + +. +\end_layout + +\begin_layout Subsubsection +Status +\end_layout + +\begin_layout Standard +Complete. +\end_layout + +\begin_layout Subsection +Extending The Header Is Difficult +\end_layout + +\begin_layout Standard +We have reserved (zeroed) words in the TDB header, which can be used for + future features. + If the future features are compulsory, the version number must be updated + to prevent old code from accessing the database. + But if the future feature is optional, we have no way of telling if older + code is accessing the database or not. +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +The header should contain a +\begin_inset Quotes eld +\end_inset + +format variant +\begin_inset Quotes erd +\end_inset + + value (64-bit). + This is divided into two 32-bit parts: +\end_layout + +\begin_layout Enumerate +The lower part reflects the format variant understood by code accessing + the database. +\end_layout + +\begin_layout Enumerate +The upper part reflects the format variant you must understand to write + to the database (otherwise you can only open for reading). +\end_layout + +\begin_layout Standard +The latter field can only be written at creation time, the former should + be written under the OPEN_LOCK when opening the database for writing, if + the variant of the code is lower than the current lowest variant. +\end_layout + +\begin_layout Standard +This should allow backwards-compatible features to be added, and detection + if older code (which doesn't understand the feature) writes to the database. +\end_layout + +\begin_layout Subsubsection +Status +\end_layout + +\begin_layout Standard +Complete. +\end_layout + +\begin_layout Subsection +Record Headers Are Not Expandible +\end_layout + +\begin_layout Standard +If we later want to add (say) checksums on keys and data, it would require + another format change, which we'd like to avoid. +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +We often have extra padding at the tail of a record. + If we ensure that the first byte (if any) of this padding is zero, we will + have a way for future changes to detect code which doesn't understand a + new format: the new code would write (say) a 1 at the tail, and thus if + there is no tail or the first byte is 0, we would know the extension is + not present on that record. +\end_layout + +\begin_layout Subsubsection +Status +\end_layout + +\begin_layout Standard +Complete. +\end_layout + +\begin_layout Subsection +TDB Does Not Use Talloc +\end_layout + +\begin_layout Standard +Many users of TDB (particularly Samba) use the talloc allocator, and thus + have to wrap TDB in a talloc context to use it conveniently. +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +The allocation within TDB is not complicated enough to justify the use of + talloc, and I am reluctant to force another (excellent) library on TDB + users. + Nonetheless a compromise is possible. + An attribute (see +\begin_inset CommandInset ref +LatexCommand ref +reference "attributes" + +\end_inset + +) can be added later to tdb_open() to provide an alternate allocation mechanism, + specifically for talloc but usable by any other allocator (which would + ignore the +\begin_inset Quotes eld +\end_inset + +context +\begin_inset Quotes erd +\end_inset + + argument). +\end_layout + +\begin_layout Standard +This would form a talloc heirarchy as expected, but the caller would still + have to attach a destructor to the tdb context returned from tdb_open to + close it. + All TDB_DATA fields would be children of the tdb_context, and the caller + would still have to manage them (using talloc_free() or talloc_steal()). +\end_layout + +\begin_layout Subsubsection +Status +\end_layout + +\begin_layout Standard +Deferred. +\end_layout + +\begin_layout Section +Performance And Scalability Issues +\end_layout + +\begin_layout Subsection +\begin_inset CommandInset label +LatexCommand label +name "TDB_CLEAR_IF_FIRST-Imposes-Performance" + +\end_inset + +TDB_CLEAR_IF_FIRST Imposes Performance Penalty +\end_layout + +\begin_layout Standard +When TDB_CLEAR_IF_FIRST is specified, a 1-byte read lock is placed at offset + 4 (aka. + the ACTIVE_LOCK). + While these locks never conflict in normal tdb usage, they do add substantial + overhead for most fcntl lock implementations when the kernel scans to detect + if a lock conflict exists. + This is often a single linked list, making the time to acquire and release + a fcntl lock O(N) where N is the number of processes with the TDB open, + not the number actually doing work. +\end_layout + +\begin_layout Standard +In a Samba server it is common to have huge numbers of clients sitting idle, + and thus they have weaned themselves off the TDB_CLEAR_IF_FIRST flag. +\begin_inset Foot +status collapsed + +\begin_layout Plain Layout +There is a flag to tdb_reopen_all() which is used for this optimization: + if the parent process will outlive the child, the child does not need the + ACTIVE_LOCK. + This is a workaround for this very performance issue. +\end_layout + +\end_inset + + +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +Remove the flag. + It was a neat idea, but even trivial servers tend to know when they are + initializing for the first time and can simply unlink the old tdb at that + point. +\end_layout + +\begin_layout Subsubsection +Status +\end_layout + +\begin_layout Standard +Complete. +\end_layout + +\begin_layout Subsection +TDB Files Have a 4G Limit +\end_layout + +\begin_layout Standard +This seems to be becoming an issue (so much for +\begin_inset Quotes eld +\end_inset + +trivial +\begin_inset Quotes erd +\end_inset + +!), particularly for ldb. +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +A new, incompatible TDB format which uses 64 bit offsets internally rather + than 32 bit as now. + For simplicity of endian conversion (which TDB does on the fly if required), + all values will be 64 bit on disk. + In practice, some upper bits may be used for other purposes, but at least + 56 bits will be available for file offsets. +\end_layout + +\begin_layout Standard +tdb_open() will automatically detect the old version, and even create them + if TDB_VERSION6 is specified to tdb_open. +\end_layout + +\begin_layout Standard +32 bit processes will still be able to access TDBs larger than 4G (assuming + that their off_t allows them to seek to 64 bits), they will gracefully + fall back as they fail to mmap. + This can happen already with large TDBs. +\end_layout + +\begin_layout Standard +Old versions of tdb will fail to open the new TDB files (since 28 August + 2009, commit 398d0c29290: prior to that any unrecognized file format would + be erased and initialized as a fresh tdb!) +\end_layout + +\begin_layout Subsubsection +Status +\end_layout + +\begin_layout Standard +Complete. +\end_layout + +\begin_layout Subsection +TDB Records Have a 4G Limit +\end_layout + +\begin_layout Standard +This has not been a reported problem, and the API uses size_t which can + be 64 bit on 64 bit platforms. + However, other limits may have made such an issue moot. +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +Record sizes will be 64 bit, with an error returned on 32 bit platforms + which try to access such records (the current implementation would return + TDB_ERR_OOM in a similar case). + It seems unlikely that 32 bit keys will be a limitation, so the implementation + may not support this (see +\begin_inset CommandInset ref +LatexCommand ref +reference "sub:Records-Incur-A" + +\end_inset + +). +\end_layout + +\begin_layout Subsubsection +Status +\end_layout + +\begin_layout Standard +Complete. +\end_layout + +\begin_layout Subsection +Hash Size Is Determined At TDB Creation Time +\end_layout + +\begin_layout Standard +TDB contains a number of hash chains in the header; the number is specified + at creation time, and defaults to 131. + This is such a bottleneck on large databases (as each hash chain gets quite + long), that LDB uses 10,000 for this hash. + In general it is impossible to know what the 'right' answer is at database + creation time. +\end_layout + +\begin_layout Subsubsection +\begin_inset CommandInset label +LatexCommand label +name "sub:Hash-Size-Solution" + +\end_inset + +Proposed Solution +\end_layout + +\begin_layout Standard +After comprehensive performance testing on various scalable hash variants +\begin_inset Foot +status collapsed + +\begin_layout Plain Layout +http://rusty.ozlabs.org/?p=89 and http://rusty.ozlabs.org/?p=94 This was annoying + because I was previously convinced that an expanding tree of hashes would + be very close to optimal. +\end_layout + +\end_inset + +, it became clear that it is hard to beat a straight linear hash table which + doubles in size when it reaches saturation. + Unfortunately, altering the hash table introduces serious locking complications +: the entire hash table needs to be locked to enlarge the hash table, and + others might be holding locks. + Particularly insidious are insertions done under tdb_chainlock. +\end_layout + +\begin_layout Standard +Thus an expanding layered hash will be used: an array of hash groups, with + each hash group exploding into pointers to lower hash groups once it fills, + turning into a hash tree. + This has implications for locking: we must lock the entire group in case + we need to expand it, yet we don't know how deep the tree is at that point. +\end_layout + +\begin_layout Standard +Note that bits from the hash table entries should be stolen to hold more + hash bits to reduce the penalty of collisions. + We can use the otherwise-unused lower 3 bits. + If we limit the size of the database to 64 exabytes, we can use the top + 8 bits of the hash entry as well. + These 11 bits would reduce false positives down to 1 in 2000 which is more + than we need: we can use one of the bits to indicate that the extra hash + bits are valid. + This means we can choose not to re-hash all entries when we expand a hash + group; simply use the next bits we need and mark them invalid. +\end_layout + +\begin_layout Subsubsection +Status +\end_layout + +\begin_layout Standard +Complete. +\end_layout + +\begin_layout Subsection +\begin_inset CommandInset label +LatexCommand label +name "TDB-Freelist-Is" + +\end_inset + +TDB Freelist Is Highly Contended +\end_layout + +\begin_layout Standard +TDB uses a single linked list for the free list. + Allocation occurs as follows, using heuristics which have evolved over + time: +\end_layout + +\begin_layout Enumerate +Get the free list lock for this whole operation. +\end_layout + +\begin_layout Enumerate +Multiply length by 1.25, so we always over-allocate by 25%. +\end_layout + +\begin_layout Enumerate +Set the slack multiplier to 1. +\end_layout + +\begin_layout Enumerate +Examine the current freelist entry: if it is > length but < the current + best case, remember it as the best case. +\end_layout + +\begin_layout Enumerate +Multiply the slack multiplier by 1.05. +\end_layout + +\begin_layout Enumerate +If our best fit so far is less than length * slack multiplier, return it. + The slack will be turned into a new free record if it's large enough. +\end_layout + +\begin_layout Enumerate +Otherwise, go onto the next freelist entry. +\end_layout + +\begin_layout Standard +Deleting a record occurs as follows: +\end_layout + +\begin_layout Enumerate +Lock the hash chain for this whole operation. +\end_layout + +\begin_layout Enumerate +Walk the chain to find the record, keeping the prev pointer offset. +\end_layout + +\begin_layout Enumerate +If max_dead is non-zero: +\end_layout + +\begin_deeper +\begin_layout Enumerate +Walk the hash chain again and count the dead records. +\end_layout + +\begin_layout Enumerate +If it's more than max_dead, bulk free all the dead ones (similar to steps + 4 and below, but the lock is only obtained once). +\end_layout + +\begin_layout Enumerate +Simply mark this record as dead and return. + +\end_layout + +\end_deeper +\begin_layout Enumerate +Get the free list lock for the remainder of this operation. +\end_layout + +\begin_layout Enumerate +\begin_inset CommandInset label +LatexCommand label +name "right-merging" + +\end_inset + +Examine the following block to see if it is free; if so, enlarge the current + block and remove that block from the free list. + This was disabled, as removal from the free list was O(entries-in-free-list). +\end_layout + +\begin_layout Enumerate +Examine the preceeding block to see if it is free: for this reason, each + block has a 32-bit tailer which indicates its length. + If it is free, expand it to cover our new block and return. +\end_layout + +\begin_layout Enumerate +Otherwise, prepend ourselves to the free list. +\end_layout + +\begin_layout Standard +Disabling right-merging (step +\begin_inset CommandInset ref +LatexCommand ref +reference "right-merging" + +\end_inset + +) causes fragmentation; the other heuristics proved insufficient to address + this, so the final answer to this was that when we expand the TDB file + inside a transaction commit, we repack the entire tdb. +\end_layout + +\begin_layout Standard +The single list lock limits our allocation rate; due to the other issues + this is not currently seen as a bottleneck. +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +The first step is to remove all the current heuristics, as they obviously + interact, then examine them once the lock contention is addressed. +\end_layout + +\begin_layout Standard +The free list must be split to reduce contention. + Assuming perfect free merging, we can at most have 1 free list entry for + each entry. + This implies that the number of free lists is related to the size of the + hash table, but as it is rare to walk a large number of free list entries + we can use far fewer, say 1/32 of the number of hash buckets. +\end_layout + +\begin_layout Standard +It seems tempting to try to reuse the hash implementation which we use for + records here, but we have two ways of searching for free entries: for allocatio +n we search by size (and possibly zone) which produces too many clashes + for our hash table to handle well, and for coalescing we search by address. + Thus an array of doubly-linked free lists seems preferable. +\end_layout + +\begin_layout Standard +There are various benefits in using per-size free lists (see +\begin_inset CommandInset ref +LatexCommand ref +reference "sub:TDB-Becomes-Fragmented" + +\end_inset + +) but it's not clear this would reduce contention in the common case where + all processes are allocating/freeing the same size. + Thus we almost certainly need to divide in other ways: the most obvious + is to divide the file into zones, and using a free list (or table of free + lists) for each. + This approximates address ordering. +\end_layout + +\begin_layout Standard +Unfortunately it is difficult to know what heuristics should be used to + determine zone sizes, and our transaction code relies on being able to + create a +\begin_inset Quotes eld +\end_inset + +recovery area +\begin_inset Quotes erd +\end_inset + + by simply appending to the file (difficult if it would need to create a + new zone header). + Thus we use a linked-list of free tables; currently we only ever create + one, but if there is more than one we choose one at random to use. + In future we may use heuristics to add new free tables on contention. + We only expand the file when all free tables are exhausted. +\end_layout + +\begin_layout Standard +The basic algorithm is as follows. + Freeing is simple: +\end_layout + +\begin_layout Enumerate +Identify the correct free list. +\end_layout + +\begin_layout Enumerate +Lock the corresponding list. +\end_layout + +\begin_layout Enumerate +Re-check the list (we didn't have a lock, sizes could have changed): relock + if necessary. +\end_layout + +\begin_layout Enumerate +Place the freed entry in the list. +\end_layout + +\begin_layout Standard +Allocation is a little more complicated, as we perform delayed coalescing + at this point: +\end_layout + +\begin_layout Enumerate +Pick a free table; usually the previous one. +\end_layout + +\begin_layout Enumerate +Lock the corresponding list. +\end_layout + +\begin_layout Enumerate +If the top entry is -large enough, remove it from the list and return it. +\end_layout + +\begin_layout Enumerate +Otherwise, coalesce entries in the list.If there was no entry large enough, + unlock the list and try the next largest list +\end_layout + +\begin_layout Enumerate +If no list has an entry which meets our needs, try the next free table. +\end_layout + +\begin_layout Enumerate +If no zone satisfies, expand the file. +\end_layout + +\begin_layout Standard +This optimizes rapid insert/delete of free list entries by not coalescing + them all the time.. + First-fit address ordering ordering seems to be fairly good for keeping + fragmentation low (see +\begin_inset CommandInset ref +LatexCommand ref +reference "sub:TDB-Becomes-Fragmented" + +\end_inset + +). + Note that address ordering does not need a tailer to coalesce, though if + we needed one we could have one cheaply: see +\begin_inset CommandInset ref +LatexCommand ref +reference "sub:Records-Incur-A" + +\end_inset + +. + +\end_layout + +\begin_layout Standard +Each free entry has the free table number in the header: less than 255. + It also contains a doubly-linked list for easy deletion. +\end_layout + +\begin_layout Subsection +\begin_inset CommandInset label +LatexCommand label +name "sub:TDB-Becomes-Fragmented" + +\end_inset + +TDB Becomes Fragmented +\end_layout + +\begin_layout Standard +Much of this is a result of allocation strategy +\begin_inset Foot +status collapsed + +\begin_layout Plain Layout +The Memory Fragmentation Problem: Solved? Johnstone & Wilson 1995 ftp://ftp.cs.ute +xas.edu/pub/garbage/malloc/ismm98.ps +\end_layout + +\end_inset + + and deliberate hobbling of coalescing; internal fragmentation (aka overallocati +on) is deliberately set at 25%, and external fragmentation is only cured + by the decision to repack the entire db when a transaction commit needs + to enlarge the file. +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +The 25% overhead on allocation works in practice for ldb because indexes + tend to expand by one record at a time. + This internal fragmentation can be resolved by having an +\begin_inset Quotes eld +\end_inset + +expanded +\begin_inset Quotes erd +\end_inset + + bit in the header to note entries that have previously expanded, and allocating + more space for them. +\end_layout + +\begin_layout Standard +There are is a spectrum of possible solutions for external fragmentation: + one is to use a fragmentation-avoiding allocation strategy such as best-fit + address-order allocator. + The other end of the spectrum would be to use a bump allocator (very fast + and simple) and simply repack the file when we reach the end. +\end_layout + +\begin_layout Standard +There are three problems with efficient fragmentation-avoiding allocators: + they are non-trivial, they tend to use a single free list for each size, + and there's no evidence that tdb allocation patterns will match those recorded + for general allocators (though it seems likely). +\end_layout + +\begin_layout Standard +Thus we don't spend too much effort on external fragmentation; we will be + no worse than the current code if we need to repack on occasion. + More effort is spent on reducing freelist contention, and reducing overhead. +\end_layout + +\begin_layout Subsection +\begin_inset CommandInset label +LatexCommand label +name "sub:Records-Incur-A" + +\end_inset + +Records Incur A 28-Byte Overhead +\end_layout + +\begin_layout Standard +Each TDB record has a header as follows: +\end_layout + +\begin_layout LyX-Code +struct tdb_record { +\end_layout + +\begin_layout LyX-Code + tdb_off_t next; /* offset of the next record in the list */ +\end_layout + +\begin_layout LyX-Code + tdb_len_t rec_len; /* total byte length of record */ +\end_layout + +\begin_layout LyX-Code + tdb_len_t key_len; /* byte length of key */ +\end_layout + +\begin_layout LyX-Code + tdb_len_t data_len; /* byte length of data */ +\end_layout + +\begin_layout LyX-Code + uint32_t full_hash; /* the full 32 bit hash of the key */ +\end_layout + +\begin_layout LyX-Code + uint32_t magic; /* try to catch errors */ +\end_layout + +\begin_layout LyX-Code + /* the following union is implied: +\end_layout + +\begin_layout LyX-Code + union { +\end_layout + +\begin_layout LyX-Code + char record[rec_len]; +\end_layout + +\begin_layout LyX-Code + struct { +\end_layout + +\begin_layout LyX-Code + char key[key_len]; +\end_layout + +\begin_layout LyX-Code + char data[data_len]; +\end_layout + +\begin_layout LyX-Code + } +\end_layout + +\begin_layout LyX-Code + uint32_t totalsize; (tailer) +\end_layout + +\begin_layout LyX-Code + } +\end_layout + +\begin_layout LyX-Code + */ +\end_layout + +\begin_layout LyX-Code +}; +\end_layout + +\begin_layout Standard +Naively, this would double to a 56-byte overhead on a 64 bit implementation. +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +We can use various techniques to reduce this for an allocated block: +\end_layout + +\begin_layout Enumerate +The 'next' pointer is not required, as we are using a flat hash table. +\end_layout + +\begin_layout Enumerate +'rec_len' can instead be expressed as an addition to key_len and data_len + (it accounts for wasted or overallocated length in the record). + Since the record length is always a multiple of 8, we can conveniently + fit it in 32 bits (representing up to 35 bits). +\end_layout + +\begin_layout Enumerate +'key_len' and 'data_len' can be reduced. + I'm unwilling to restrict 'data_len' to 32 bits, but instead we can combine + the two into one 64-bit field and using a 5 bit value which indicates at + what bit to divide the two. + Keys are unlikely to scale as fast as data, so I'm assuming a maximum key + size of 32 bits. +\end_layout + +\begin_layout Enumerate +'full_hash' is used to avoid a memcmp on the +\begin_inset Quotes eld +\end_inset + +miss +\begin_inset Quotes erd +\end_inset + + case, but this is diminishing returns after a handful of bits (at 10 bits, + it reduces 99.9% of false memcmp). + As an aside, as the lower bits are already incorporated in the hash table + resolution, the upper bits should be used here. + Note that it's not clear that these bits will be a win, given the extra + bits in the hash table itself (see +\begin_inset CommandInset ref +LatexCommand ref +reference "sub:Hash-Size-Solution" + +\end_inset + +). +\end_layout + +\begin_layout Enumerate +'magic' does not need to be enlarged: it currently reflects one of 5 values + (used, free, dead, recovery, and unused_recovery). + It is useful for quick sanity checking however, and should not be eliminated. +\end_layout + +\begin_layout Enumerate +'tailer' is only used to coalesce free blocks (so a block to the right can + find the header to check if this block is free). + This can be replaced by a single 'free' bit in the header of the following + block (and the tailer only exists in free blocks). +\begin_inset Foot +status collapsed + +\begin_layout Plain Layout +This technique from Thomas Standish. + Data Structure Techniques. + Addison-Wesley, Reading, Massachusetts, 1980. +\end_layout + +\end_inset + + The current proposed coalescing algorithm doesn't need this, however. +\end_layout + +\begin_layout Standard +This produces a 16 byte used header like this: +\end_layout + +\begin_layout LyX-Code +struct tdb_used_record { +\end_layout + +\begin_layout LyX-Code + uint32_t used_magic : 16, +\end_layout + +\begin_layout LyX-Code + +\end_layout + +\begin_layout LyX-Code + key_data_divide: 5, +\end_layout + +\begin_layout LyX-Code + top_hash: 11; +\end_layout + +\begin_layout LyX-Code + uint32_t extra_octets; +\end_layout + +\begin_layout LyX-Code + uint64_t key_and_data_len; +\end_layout + +\begin_layout LyX-Code +}; +\end_layout + +\begin_layout Standard +And a free record like this: +\end_layout + +\begin_layout LyX-Code +struct tdb_free_record { +\end_layout + +\begin_layout LyX-Code + uint64_t free_magic: 8, +\end_layout + +\begin_layout LyX-Code + prev : 56; +\end_layout + +\begin_layout LyX-Code + +\end_layout + +\begin_layout LyX-Code + uint64_t free_table: 8, +\end_layout + +\begin_layout LyX-Code + total_length : 56 +\end_layout + +\begin_layout LyX-Code + uint64_t next;; +\end_layout + +\begin_layout LyX-Code +}; +\end_layout + +\begin_layout Standard +Note that by limiting valid offsets to 56 bits, we can pack everything we + need into 3 64-byte words, meaning our minimum record size is 8 bytes. +\end_layout + +\begin_layout Subsubsection +Status +\end_layout + +\begin_layout Standard +Complete. +\end_layout + +\begin_layout Subsection +Transaction Commit Requires 4 fdatasync +\end_layout + +\begin_layout Standard +The current transaction algorithm is: +\end_layout + +\begin_layout Enumerate +write_recovery_data(); +\end_layout + +\begin_layout Enumerate +sync(); +\end_layout + +\begin_layout Enumerate +write_recovery_header(); +\end_layout + +\begin_layout Enumerate +sync(); +\end_layout + +\begin_layout Enumerate +overwrite_with_new_data(); +\end_layout + +\begin_layout Enumerate +sync(); +\end_layout + +\begin_layout Enumerate +remove_recovery_header(); +\end_layout + +\begin_layout Enumerate +sync(); +\end_layout + +\begin_layout Standard +On current ext3, each sync flushes all data to disk, so the next 3 syncs + are relatively expensive. + But this could become a performance bottleneck on other filesystems such + as ext4. +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +Neil Brown points out that this is overzealous, and only one sync is needed: +\end_layout + +\begin_layout Enumerate +Bundle the recovery data, a transaction counter and a strong checksum of + the new data. +\end_layout + +\begin_layout Enumerate +Strong checksum that whole bundle. +\end_layout + +\begin_layout Enumerate +Store the bundle in the database. +\end_layout + +\begin_layout Enumerate +Overwrite the oldest of the two recovery pointers in the header (identified + using the transaction counter) with the offset of this bundle. +\end_layout + +\begin_layout Enumerate +sync. +\end_layout + +\begin_layout Enumerate +Write the new data to the file. +\end_layout + +\begin_layout Standard +Checking for recovery means identifying the latest bundle with a valid checksum + and using the new data checksum to ensure that it has been applied. + This is more expensive than the current check, but need only be done at + open. + For running databases, a separate header field can be used to indicate + a transaction in progress; we need only check for recovery if this is set. +\end_layout + +\begin_layout Subsubsection +Status +\end_layout + +\begin_layout Standard +Deferred. +\end_layout + +\begin_layout Subsection +\begin_inset CommandInset label +LatexCommand label +name "sub:TDB-Does-Not" + +\end_inset + +TDB Does Not Have Snapshot Support +\end_layout + +\begin_layout Subsubsection +Proposed SolutionNone. + At some point you say +\begin_inset Quotes eld +\end_inset + +use a real database +\begin_inset Quotes erd +\end_inset + + (but see +\begin_inset CommandInset ref +LatexCommand ref +reference "replay-attribute" + +\end_inset + +). +\end_layout + +\begin_layout Standard +But as a thought experiment, if we implemented transactions to only overwrite + free entries (this is tricky: there must not be a header in each entry + which indicates whether it is free, but use of presence in metadata elsewhere), + and a pointer to the hash table, we could create an entirely new commit + without destroying existing data. + Then it would be easy to implement snapshots in a similar way. +\end_layout + +\begin_layout Standard +This would not allow arbitrary changes to the database, such as tdb_repack + does, and would require more space (since we have to preserve the current + and future entries at once). + If we used hash trees rather than one big hash table, we might only have + to rewrite some sections of the hash, too. +\end_layout + +\begin_layout Standard +We could then implement snapshots using a similar method, using multiple + different hash tables/free tables. +\end_layout + +\begin_layout Subsubsection +Status +\end_layout + +\begin_layout Standard +Deferred. +\end_layout + +\begin_layout Subsection +Transactions Cannot Operate in Parallel +\end_layout + +\begin_layout Standard +This would be useless for ldb, as it hits the index records with just about + every update. + It would add significant complexity in resolving clashes, and cause the + all transaction callers to write their code to loop in the case where the + transactions spuriously failed. +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +None (but see +\begin_inset CommandInset ref +LatexCommand ref +reference "replay-attribute" + +\end_inset + +). + We could solve a small part of the problem by providing read-only transactions. + These would allow one write transaction to begin, but it could not commit + until all r/o transactions are done. + This would require a new RO_TRANSACTION_LOCK, which would be upgraded on + commit. +\end_layout + +\begin_layout Subsubsection +Status +\end_layout + +\begin_layout Standard +Deferred. +\end_layout + +\begin_layout Subsection +Default Hash Function Is Suboptimal +\end_layout + +\begin_layout Standard +The Knuth-inspired multiplicative hash used by tdb is fairly slow (especially + if we expand it to 64 bits), and works best when the hash bucket size is + a prime number (which also means a slow modulus). + In addition, it is highly predictable which could potentially lead to a + Denial of Service attack in some TDB uses. +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +The Jenkins lookup3 hash +\begin_inset Foot +status open + +\begin_layout Plain Layout +http://burtleburtle.net/bob/c/lookup3.c +\end_layout + +\end_inset + + is a fast and superbly-mixing hash. + It's used by the Linux kernel and almost everything else. + This has the particular properties that it takes an initial seed, and produces + two 32 bit hash numbers, which we can combine into a 64-bit hash. +\end_layout + +\begin_layout Standard +The seed should be created at tdb-creation time from some random source, + and placed in the header. + This is far from foolproof, but adds a little bit of protection against + hash bombing. +\end_layout + +\begin_layout Subsubsection +Status +\end_layout + +\begin_layout Standard +Complete. +\end_layout + +\begin_layout Subsection +\begin_inset CommandInset label +LatexCommand label +name "Reliable-Traversal-Adds" + +\end_inset + +Reliable Traversal Adds Complexity +\end_layout + +\begin_layout Standard +We lock a record during traversal iteration, and try to grab that lock in + the delete code. + If that grab on delete fails, we simply mark it deleted and continue onwards; + traversal checks for this condition and does the delete when it moves off + the record. +\end_layout + +\begin_layout Standard +If traversal terminates, the dead record may be left indefinitely. +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +Remove reliability guarantees; see +\begin_inset CommandInset ref +LatexCommand ref +reference "traverse-Proposed-Solution" + +\end_inset + +. +\end_layout + +\begin_layout Subsubsection +Status +\end_layout + +\begin_layout Standard +Complete. +\end_layout + +\begin_layout Subsection +Fcntl Locking Adds Overhead +\end_layout + +\begin_layout Standard +Placing a fcntl lock means a system call, as does removing one. + This is actually one reason why transactions can be faster (everything + is locked once at transaction start). + In the uncontended case, this overhead can theoretically be eliminated. +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +None. +\end_layout + +\begin_layout Standard +We tried this before with spinlock support, in the early days of TDB, and + it didn't make much difference except in manufactured benchmarks. +\end_layout + +\begin_layout Standard +We could use spinlocks (with futex kernel support under Linux), but it means + that we lose automatic cleanup when a process dies with a lock. + There is a method of auto-cleanup under Linux, but it's not supported by + other operating systems. + We could reintroduce a clear-if-first-style lock and sweep for dead futexes + on open, but that wouldn't help the normal case of one concurrent opener + dying. + Increasingly elaborate repair schemes could be considered, but they require + an ABI change (everyone must use them) anyway, so there's no need to do + this at the same time as everything else. +\end_layout + +\begin_layout Subsection +Some Transactions Don't Require Durability +\end_layout + +\begin_layout Standard +Volker points out that gencache uses a CLEAR_IF_FIRST tdb for normal (fast) + usage, and occasionally empties the results into a transactional TDB. + This kind of usage prioritizes performance over durability: as long as + we are consistent, data can be lost. +\end_layout + +\begin_layout Standard +This would be more neatly implemented inside tdb: a +\begin_inset Quotes eld +\end_inset + +soft +\begin_inset Quotes erd +\end_inset + + transaction commit (ie. + syncless) which meant that data may be reverted on a crash. +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +None. +\end_layout + +\begin_layout Standard +Unfortunately any transaction scheme which overwrites old data requires + a sync before that overwrite to avoid the possibility of corruption. +\end_layout + +\begin_layout Standard +It seems possible to use a scheme similar to that described in +\begin_inset CommandInset ref +LatexCommand ref +reference "sub:TDB-Does-Not" + +\end_inset + +,where transactions are committed without overwriting existing data, and + an array of top-level pointers were available in the header. + If the transaction is +\begin_inset Quotes eld +\end_inset + +soft +\begin_inset Quotes erd +\end_inset + + then we would not need a sync at all: existing processes would pick up + the new hash table and free list and work with that. +\end_layout + +\begin_layout Standard +At some later point, a sync would allow recovery of the old data into the + free lists (perhaps when the array of top-level pointers filled). + On crash, tdb_open() would examine the array of top levels, and apply the + transactions until it encountered an invalid checksum. +\end_layout + +\begin_layout Subsection +Tracing Is Fragile, Replay Is External +\end_layout + +\begin_layout Standard +The current TDB has compile-time-enabled tracing code, but it often breaks + as it is not enabled by default. + In a similar way, the ctdb code has an external wrapper which does replay + tracing so it can coordinate cluster-wide transactions. +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\begin_inset CommandInset label +LatexCommand label +name "replay-attribute" + +\end_inset + + +\end_layout + +\begin_layout Standard +Tridge points out that an attribute can be later added to tdb_open (see + +\begin_inset CommandInset ref +LatexCommand ref +reference "attributes" + +\end_inset + +) to provide replay/trace hooks, which could become the basis for this and + future parallel transactions and snapshot support. +\end_layout + +\begin_layout Subsubsection +Status +\end_layout + +\begin_layout Standard +Deferred. +\end_layout + +\end_body +\end_document diff --git a/lib/tdb2/doc/design.lyx,v b/lib/tdb2/doc/design.lyx,v new file mode 100644 index 0000000000..13e6387f7f --- /dev/null +++ b/lib/tdb2/doc/design.lyx,v @@ -0,0 +1,4679 @@ +head 1.13; +access; +symbols; +locks; strict; +comment @# @; + + +1.13 +date 2011.03.01.11.46.54; author rusty; state Exp; +branches; +next 1.12; + +1.12 +date 2010.12.01.12.20.49; author rusty; state Exp; +branches; +next 1.11; + +1.11 +date 2010.12.01.11.55.20; author rusty; state Exp; +branches; +next 1.10; + +1.10 +date 2010.09.14.00.33.57; author rusty; state Exp; +branches; +next 1.9; + +1.9 +date 2010.09.09.07.25.12; author rusty; state Exp; +branches; +next 1.8; + +1.8 +date 2010.09.02.02.29.05; author rusty; state Exp; +branches; +next 1.7; + +1.7 +date 2010.09.01.10.58.12; author rusty; state Exp; +branches; +next 1.6; + +1.6 +date 2010.08.02.00.21.43; author rusty; state Exp; +branches; +next 1.5; + +1.5 +date 2010.08.02.00.21.16; author rusty; state Exp; +branches; +next 1.4; + +1.4 +date 2010.05.10.13.09.11; author rusty; state Exp; +branches; +next 1.3; + +1.3 +date 2010.05.10.11.58.37; author rusty; state Exp; +branches; +next 1.2; + +1.2 +date 2010.05.10.05.35.13; author rusty; state Exp; +branches; +next 1.1; + +1.1 +date 2010.05.04.02.29.16; author rusty; state Exp; +branches; +next ; + + +desc +@First draft +@ + + +1.13 +log +@Thread-safe API +@ +text +@#LyX 1.6.7 created this file. For more info see http://www.lyx.org/ +\lyxformat 345 +\begin_document +\begin_header +\textclass article +\use_default_options true +\language english +\inputencoding auto +\font_roman default +\font_sans default +\font_typewriter default +\font_default_family default +\font_sc false +\font_osf false +\font_sf_scale 100 +\font_tt_scale 100 + +\graphics default +\paperfontsize default +\use_hyperref false +\papersize default +\use_geometry false +\use_amsmath 1 +\use_esint 1 +\cite_engine basic +\use_bibtopic false +\paperorientation portrait +\secnumdepth 3 +\tocdepth 3 +\paragraph_separation indent +\defskip medskip +\quotes_language english +\papercolumns 1 +\papersides 1 +\paperpagestyle default +\tracking_changes true +\output_changes true +\author "Rusty Russell,,," +\author "" +\end_header + +\begin_body + +\begin_layout Title +TDB2: A Redesigning The Trivial DataBase +\end_layout + +\begin_layout Author +Rusty Russell, IBM Corporation +\end_layout + +\begin_layout Date +1-December-2010 +\end_layout + +\begin_layout Abstract +The Trivial DataBase on-disk format is 32 bits; with usage cases heading + towards the 4G limit, that must change. + This required breakage provides an opportunity to revisit TDB's other design + decisions and reassess them. +\end_layout + +\begin_layout Section +Introduction +\end_layout + +\begin_layout Standard +The Trivial DataBase was originally written by Andrew Tridgell as a simple + key/data pair storage system with the same API as dbm, but allowing multiple + readers and writers while being small enough (< 1000 lines of C) to include + in SAMBA. + The simple design created in 1999 has proven surprisingly robust and performant +, used in Samba versions 3 and 4 as well as numerous other projects. + Its useful life was greatly increased by the (backwards-compatible!) addition + of transaction support in 2005. +\end_layout + +\begin_layout Standard +The wider variety and greater demands of TDB-using code has lead to some + organic growth of the API, as well as some compromises on the implementation. + None of these, by themselves, are seen as show-stoppers, but the cumulative + effect is to a loss of elegance over the initial, simple TDB implementation. + Here is a table of the approximate number of lines of implementation code + and number of API functions at the end of each year: +\end_layout + +\begin_layout Standard +\begin_inset Tabular +<lyxtabular version="3" rows="12" columns="3"> +<features> +<column alignment="center" valignment="top" width="0"> +<column alignment="center" valignment="top" width="0"> +<column alignment="center" valignment="top" width="0"> +<row> +<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +Year End +\end_layout + +\end_inset +</cell> +<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +API Functions +\end_layout + +\end_inset +</cell> +<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +Lines of C Code Implementation +\end_layout + +\end_inset +</cell> +</row> +<row> +<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +1999 +\end_layout + +\end_inset +</cell> +<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +13 +\end_layout + +\end_inset +</cell> +<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +1195 +\end_layout + +\end_inset +</cell> +</row> +<row> +<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +2000 +\end_layout + +\end_inset +</cell> +<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +24 +\end_layout + +\end_inset +</cell> +<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +1725 +\end_layout + +\end_inset +</cell> +</row> +<row> +<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +2001 +\end_layout + +\end_inset +</cell> +<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +32 +\end_layout + +\end_inset +</cell> +<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +2228 +\end_layout + +\end_inset +</cell> +</row> +<row> +<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +2002 +\end_layout + +\end_inset +</cell> +<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +35 +\end_layout + +\end_inset +</cell> +<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +2481 +\end_layout + +\end_inset +</cell> +</row> +<row> +<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +2003 +\end_layout + +\end_inset +</cell> +<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +35 +\end_layout + +\end_inset +</cell> +<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +2552 +\end_layout + +\end_inset +</cell> +</row> +<row> +<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +2004 +\end_layout + +\end_inset +</cell> +<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +40 +\end_layout + +\end_inset +</cell> +<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +2584 +\end_layout + +\end_inset +</cell> +</row> +<row> +<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +2005 +\end_layout + +\end_inset +</cell> +<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +38 +\end_layout + +\end_inset +</cell> +<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +2647 +\end_layout + +\end_inset +</cell> +</row> +<row> +<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +2006 +\end_layout + +\end_inset +</cell> +<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +52 +\end_layout + +\end_inset +</cell> +<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +3754 +\end_layout + +\end_inset +</cell> +</row> +<row> +<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +2007 +\end_layout + +\end_inset +</cell> +<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +66 +\end_layout + +\end_inset +</cell> +<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +4398 +\end_layout + +\end_inset +</cell> +</row> +<row> +<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +2008 +\end_layout + +\end_inset +</cell> +<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +71 +\end_layout + +\end_inset +</cell> +<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +4768 +\end_layout + +\end_inset +</cell> +</row> +<row> +<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +2009 +\end_layout + +\end_inset +</cell> +<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +73 +\end_layout + +\end_inset +</cell> +<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none"> +\begin_inset Text + +\begin_layout Plain Layout +5715 +\end_layout + +\end_inset +</cell> +</row> +</lyxtabular> + +\end_inset + + +\end_layout + +\begin_layout Standard +This review is an attempt to catalog and address all the known issues with + TDB and create solutions which address the problems without significantly + increasing complexity; all involved are far too aware of the dangers of + second system syndrome in rewriting a successful project like this. +\end_layout + +\begin_layout Section +API Issues +\end_layout + +\begin_layout Subsection +tdb_open_ex Is Not Expandable +\end_layout + +\begin_layout Standard +The tdb_open() call was expanded to tdb_open_ex(), which added an optional + hashing function and an optional logging function argument. + Additional arguments to open would require the introduction of a tdb_open_ex2 + call etc. +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\begin_inset CommandInset label +LatexCommand label +name "attributes" + +\end_inset + + +\end_layout + +\begin_layout Standard +tdb_open() will take a linked-list of attributes: +\end_layout + +\begin_layout LyX-Code +enum tdb_attribute { +\end_layout + +\begin_layout LyX-Code + TDB_ATTRIBUTE_LOG = 0, +\end_layout + +\begin_layout LyX-Code + TDB_ATTRIBUTE_HASH = 1 +\end_layout + +\begin_layout LyX-Code +}; +\end_layout + +\begin_layout LyX-Code +struct tdb_attribute_base { +\end_layout + +\begin_layout LyX-Code + enum tdb_attribute attr; +\end_layout + +\begin_layout LyX-Code + union tdb_attribute *next; +\end_layout + +\begin_layout LyX-Code +}; +\end_layout + +\begin_layout LyX-Code +struct tdb_attribute_log { +\end_layout + +\begin_layout LyX-Code + struct tdb_attribute_base base; /* .attr = TDB_ATTRIBUTE_LOG */ +\end_layout + +\begin_layout LyX-Code + tdb_log_func log_fn; +\end_layout + +\begin_layout LyX-Code + void *log_private; +\end_layout + +\begin_layout LyX-Code +}; +\end_layout + +\begin_layout LyX-Code +struct tdb_attribute_hash { +\end_layout + +\begin_layout LyX-Code + struct tdb_attribute_base base; /* .attr = TDB_ATTRIBUTE_HASH */ +\end_layout + +\begin_layout LyX-Code + tdb_hash_func hash_fn; +\end_layout + +\begin_layout LyX-Code + void *hash_private; +\end_layout + +\begin_layout LyX-Code +}; +\end_layout + +\begin_layout LyX-Code +union tdb_attribute { +\end_layout + +\begin_layout LyX-Code + struct tdb_attribute_base base; +\end_layout + +\begin_layout LyX-Code + struct tdb_attribute_log log; +\end_layout + +\begin_layout LyX-Code + struct tdb_attribute_hash hash; +\end_layout + +\begin_layout LyX-Code +}; +\end_layout + +\begin_layout Standard +This allows future attributes to be added, even if this expands the size + of the union. +\end_layout + +\begin_layout Subsubsection +Status +\end_layout + +\begin_layout Standard +Complete. +\end_layout + +\begin_layout Subsection +tdb_traverse Makes Impossible Guarantees +\end_layout + +\begin_layout Standard +tdb_traverse (and tdb_firstkey/tdb_nextkey) predate transactions, and it + was thought that it was important to guarantee that all records which exist + at the start and end of the traversal would be included, and no record + would be included twice. +\end_layout + +\begin_layout Standard +This adds complexity (see +\begin_inset CommandInset ref +LatexCommand ref +reference "Reliable-Traversal-Adds" + +\end_inset + +) and does not work anyway for records which are altered (in particular, + those which are expanded may be effectively deleted and re-added behind + the traversal). +\end_layout + +\begin_layout Subsubsection +\begin_inset CommandInset label +LatexCommand label +name "traverse-Proposed-Solution" + +\end_inset + +Proposed Solution +\end_layout + +\begin_layout Standard +Abandon the guarantee. + You will see every record if no changes occur during your traversal, otherwise + you will see some subset. + You can prevent changes by using a transaction or the locking API. +\end_layout + +\begin_layout Subsubsection +Status +\end_layout + +\begin_layout Standard +Complete. + Delete-during-traverse will still delete every record, too (assuming no + other changes). +\end_layout + +\begin_layout Subsection +Nesting of Transactions Is Fraught +\end_layout + +\begin_layout Standard +TDB has alternated between allowing nested transactions and not allowing + them. + Various paths in the Samba codebase assume that transactions will nest, + and in a sense they can: the operation is only committed to disk when the + outer transaction is committed. + There are two problems, however: +\end_layout + +\begin_layout Enumerate +Canceling the inner transaction will cause the outer transaction commit + to fail, and will not undo any operations since the inner transaction began. + This problem is soluble with some additional internal code. +\end_layout + +\begin_layout Enumerate +An inner transaction commit can be cancelled by the outer transaction. + This is desirable in the way which Samba's database initialization code + uses transactions, but could be a surprise to any users expecting a successful + transaction commit to expose changes to others. +\end_layout + +\begin_layout Standard +The current solution is to specify the behavior at tdb_open(), with the + default currently that nested transactions are allowed. + This flag can also be changed at runtime. +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +Given the usage patterns, it seems that the +\begin_inset Quotes eld +\end_inset + +least-surprise +\begin_inset Quotes erd +\end_inset + + behavior of disallowing nested transactions should become the default. + Additionally, it seems the outer transaction is the only code which knows + whether inner transactions should be allowed, so a flag to indicate this + could be added to tdb_transaction_start. + However, this behavior can be simulated with a wrapper which uses tdb_add_flags +() and tdb_remove_flags(), so the API should not be expanded for this relatively +-obscure case. +\end_layout + +\begin_layout Subsubsection +Status +\end_layout + +\begin_layout Standard + +\change_deleted 0 1298979572 +Incomplete; nesting flag is still defined as per tdb1. +\change_inserted 0 1298979584 +Complete; the nesting flag has been removed. +\change_unchanged + +\end_layout + +\begin_layout Subsection +Incorrect Hash Function is Not Detected +\end_layout + +\begin_layout Standard +tdb_open_ex() allows the calling code to specify a different hash function + to use, but does not check that all other processes accessing this tdb + are using the same hash function. + The result is that records are missing from tdb_fetch(). +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +The header should contain an example hash result (eg. + the hash of 0xdeadbeef), and tdb_open_ex() should check that the given + hash function produces the same answer, or fail the tdb_open call. +\end_layout + +\begin_layout Subsubsection +Status +\end_layout + +\begin_layout Standard +Complete. +\end_layout + +\begin_layout Subsection +tdb_set_max_dead/TDB_VOLATILE Expose Implementation +\end_layout + +\begin_layout Standard +In response to scalability issues with the free list ( +\begin_inset CommandInset ref +LatexCommand ref +reference "TDB-Freelist-Is" + +\end_inset + +) two API workarounds have been incorporated in TDB: tdb_set_max_dead() + and the TDB_VOLATILE flag to tdb_open. + The latter actually calls the former with an argument of +\begin_inset Quotes eld +\end_inset + +5 +\begin_inset Quotes erd +\end_inset + +. +\end_layout + +\begin_layout Standard +This code allows deleted records to accumulate without putting them in the + free list. + On delete we iterate through each chain and free them in a batch if there + are more than max_dead entries. + These are never otherwise recycled except as a side-effect of a tdb_repack. +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +With the scalability problems of the freelist solved, this API can be removed. + The TDB_VOLATILE flag may still be useful as a hint that store and delete + of records will be at least as common as fetch in order to allow some internal + tuning, but initially will become a no-op. +\end_layout + +\begin_layout Subsubsection +Status +\end_layout + +\begin_layout Standard +Incomplete. + TDB_VOLATILE still defined, but implementation should fail on unknown flags + to be future-proof. +\end_layout + +\begin_layout Subsection +\begin_inset CommandInset label +LatexCommand label +name "TDB-Files-Cannot" + +\end_inset + +TDB Files Cannot Be Opened Multiple Times In The Same Process +\end_layout + +\begin_layout Standard +No process can open the same TDB twice; we check and disallow it. + This is an unfortunate side-effect of fcntl locks, which operate on a per-file + rather than per-file-descriptor basis, and do not nest. + Thus, closing any file descriptor on a file clears all the locks obtained + by this process, even if they were placed using a different file descriptor! +\end_layout + +\begin_layout Standard +Note that even if this were solved, deadlock could occur if operations were + nested: this is a more manageable programming error in most cases. +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +We could lobby POSIX to fix the perverse rules, or at least lobby Linux + to violate them so that the most common implementation does not have this + restriction. + This would be a generally good idea for other fcntl lock users. +\end_layout + +\begin_layout Standard +Samba uses a wrapper which hands out the same tdb_context to multiple callers + if this happens, and does simple reference counting. + We should do this inside the tdb library, which already emulates lock nesting + internally; it would need to recognize when deadlock occurs within a single + process. + This would create a new failure mode for tdb operations (while we currently + handle locking failures, they are impossible in normal use and a process + encountering them can do little but give up). +\end_layout + +\begin_layout Standard +I do not see benefit in an additional tdb_open flag to indicate whether + re-opening is allowed, as though there may be some benefit to adding a + call to detect when a tdb_context is shared, to allow other to create such + an API. +\end_layout + +\begin_layout Subsubsection +Status +\end_layout + +\begin_layout Standard +Incomplete. +\end_layout + +\begin_layout Subsection +TDB API Is Not POSIX Thread-safe +\end_layout + +\begin_layout Standard +The TDB API uses an error code which can be queried after an operation to + determine what went wrong. + This programming model does not work with threads, unless specific additional + guarantees are given by the implementation. + In addition, even otherwise-independent threads cannot open the same TDB + (as in +\begin_inset CommandInset ref +LatexCommand ref +reference "TDB-Files-Cannot" + +\end_inset + +). +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +Reachitecting the API to include a tdb_errcode pointer would be a great + deal of churn +\change_inserted 0 1298979557 +, but fortunately most functions return 0 on success and -1 on error: we + can change these to return 0 on success and a negative error code on error, + and the API remains similar to previous. + The tdb_fetch, tdb_firstkey and tdb_nextkey functions need to take a TDB_DATA + pointer and return an error code. + It is also simpler to have tdb_nextkey replace its key argument in place, + freeing up any old .dptr. +\end_layout + +\begin_layout Standard + +\change_deleted 0 1298979438 +; we are better to guarantee that the tdb_errcode is per-thread so the current + programming model can be maintained. +\end_layout + +\begin_layout Standard + +\change_deleted 0 1298979438 +This requires dynamic per-thread allocations, which is awkward with POSIX + threads (pthread_key_create space is limited and we cannot simply allocate + a key for every TDB). +\change_unchanged + +\end_layout + +\begin_layout Standard +Internal locking is required to make sure that fcntl locks do not overlap + between threads, and also that the global list of tdbs is maintained. +\end_layout + +\begin_layout Standard +The aim is that building tdb with -DTDB_PTHREAD will result in a pthread-safe + version of the library, and otherwise no overhead will exist. + Alternatively, a hooking mechanism similar to that proposed for +\begin_inset CommandInset ref +LatexCommand ref +reference "Proposed-Solution-locking-hook" + +\end_inset + + could be used to enable pthread locking at runtime. +\end_layout + +\begin_layout Subsubsection +Status +\end_layout + +\begin_layout Standard +Incomplete +\change_inserted 0 1298979681 +; API has been changed but thread safety has not been implemented. +\change_deleted 0 1298979669 +. +\change_unchanged + +\end_layout + +\begin_layout Subsection +*_nonblock Functions And *_mark Functions Expose Implementation +\end_layout + +\begin_layout Standard +CTDB +\begin_inset Foot +status collapsed + +\begin_layout Plain Layout +Clustered TDB, see http://ctdb.samba.org +\end_layout + +\end_inset + + wishes to operate on TDB in a non-blocking manner. + This is currently done as follows: +\end_layout + +\begin_layout Enumerate +Call the _nonblock variant of an API function (eg. + tdb_lockall_nonblock). + If this fails: +\end_layout + +\begin_layout Enumerate +Fork a child process, and wait for it to call the normal variant (eg. + tdb_lockall). +\end_layout + +\begin_layout Enumerate +If the child succeeds, call the _mark variant to indicate we already have + the locks (eg. + tdb_lockall_mark). +\end_layout + +\begin_layout Enumerate +Upon completion, tell the child to release the locks (eg. + tdb_unlockall). +\end_layout + +\begin_layout Enumerate +Indicate to tdb that it should consider the locks removed (eg. + tdb_unlockall_mark). +\end_layout + +\begin_layout Standard +There are several issues with this approach. + Firstly, adding two new variants of each function clutters the API for + an obscure use, and so not all functions have three variants. + Secondly, it assumes that all paths of the functions ask for the same locks, + otherwise the parent process will have to get a lock which the child doesn't + have under some circumstances. + I don't believe this is currently the case, but it constrains the implementatio +n. + +\end_layout + +\begin_layout Subsubsection +\begin_inset CommandInset label +LatexCommand label +name "Proposed-Solution-locking-hook" + +\end_inset + +Proposed Solution +\end_layout + +\begin_layout Standard +Implement a hook for locking methods, so that the caller can control the + calls to create and remove fcntl locks. + In this scenario, ctdbd would operate as follows: +\end_layout + +\begin_layout Enumerate +Call the normal API function, eg tdb_lockall(). +\end_layout + +\begin_layout Enumerate +When the lock callback comes in, check if the child has the lock. + Initially, this is always false. + If so, return 0. + Otherwise, try to obtain it in non-blocking mode. + If that fails, return EWOULDBLOCK. +\end_layout + +\begin_layout Enumerate +Release locks in the unlock callback as normal. +\end_layout + +\begin_layout Enumerate +If tdb_lockall() fails, see if we recorded a lock failure; if so, call the + child to repeat the operation. +\end_layout + +\begin_layout Enumerate +The child records what locks it obtains, and returns that information to + the parent. +\end_layout + +\begin_layout Enumerate +When the child has succeeded, goto 1. +\end_layout + +\begin_layout Standard +This is flexible enough to handle any potential locking scenario, even when + lock requirements change. + It can be optimized so that the parent does not release locks, just tells + the child which locks it doesn't need to obtain. +\end_layout + +\begin_layout Standard +It also keeps the complexity out of the API, and in ctdbd where it is needed. +\end_layout + +\begin_layout Subsubsection +Status +\end_layout + +\begin_layout Standard +Incomplete. +\end_layout + +\begin_layout Subsection +tdb_chainlock Functions Expose Implementation +\end_layout + +\begin_layout Standard +tdb_chainlock locks some number of records, including the record indicated + by the given key. + This gave atomicity guarantees; no-one can start a transaction, alter, + read or delete that key while the lock is held. +\end_layout + +\begin_layout Standard +It also makes the same guarantee for any other key in the chain, which is + an internal implementation detail and potentially a cause for deadlock. +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +None. + It would be nice to have an explicit single entry lock which effected no + other keys. + Unfortunately, this won't work for an entry which doesn't exist. + Thus while chainlock may be implemented more efficiently for the existing + case, it will still have overlap issues with the non-existing case. + So it is best to keep the current (lack of) guarantee about which records + will be effected to avoid constraining our implementation. +\end_layout + +\begin_layout Subsection +Signal Handling is Not Race-Free +\end_layout + +\begin_layout Standard +The tdb_setalarm_sigptr() call allows the caller's signal handler to indicate + that the tdb locking code should return with a failure, rather than trying + again when a signal is received (and errno == EAGAIN). + This is usually used to implement timeouts. +\end_layout + +\begin_layout Standard +Unfortunately, this does not work in the case where the signal is received + before the tdb code enters the fcntl() call to place the lock: the code + will sleep within the fcntl() code, unaware that the signal wants it to + exit. + In the case of long timeouts, this does not happen in practice. +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +The locking hooks proposed in +\begin_inset CommandInset ref +LatexCommand ref +reference "Proposed-Solution-locking-hook" + +\end_inset + + would allow the user to decide on whether to fail the lock acquisition + on a signal. + This allows the caller to choose their own compromise: they could narrow + the race by checking immediately before the fcntl call. +\begin_inset Foot +status collapsed + +\begin_layout Plain Layout +It may be possible to make this race-free in some implementations by having + the signal handler alter the struct flock to make it invalid. + This will cause the fcntl() lock call to fail with EINVAL if the signal + occurs before the kernel is entered, otherwise EAGAIN. +\end_layout + +\end_inset + + +\end_layout + +\begin_layout Subsubsection +Status +\end_layout + +\begin_layout Standard +Incomplete. +\end_layout + +\begin_layout Subsection +The API Uses Gratuitous Typedefs, Capitals +\end_layout + +\begin_layout Standard +typedefs are useful for providing source compatibility when types can differ + across implementations, or arguably in the case of function pointer definitions + which are hard for humans to parse. + Otherwise it is simply obfuscation and pollutes the namespace. +\end_layout + +\begin_layout Standard +Capitalization is usually reserved for compile-time constants and macros. +\end_layout + +\begin_layout Description +TDB_CONTEXT There is no reason to use this over 'struct tdb_context'; the + definition isn't visible to the API user anyway. +\end_layout + +\begin_layout Description +TDB_DATA There is no reason to use this over struct TDB_DATA; the struct + needs to be understood by the API user. +\end_layout + +\begin_layout Description +struct +\begin_inset space ~ +\end_inset + +TDB_DATA This would normally be called 'struct tdb_data'. +\end_layout + +\begin_layout Description +enum +\begin_inset space ~ +\end_inset + +TDB_ERROR Similarly, this would normally be enum tdb_error. +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +None. + Introducing lower case variants would please pedants like myself, but if + it were done the existing ones should be kept. + There is little point forcing a purely cosmetic change upon tdb users. +\end_layout + +\begin_layout Subsection +\begin_inset CommandInset label +LatexCommand label +name "tdb_log_func-Doesnt-Take" + +\end_inset + +tdb_log_func Doesn't Take The Private Pointer +\end_layout + +\begin_layout Standard +For API compatibility reasons, the logging function needs to call tdb_get_loggin +g_private() to retrieve the pointer registered by the tdb_open_ex for logging. +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +It should simply take an extra argument, since we are prepared to break + the API/ABI. +\end_layout + +\begin_layout Subsubsection +Status +\end_layout + +\begin_layout Standard +Complete. +\end_layout + +\begin_layout Subsection +Various Callback Functions Are Not Typesafe +\end_layout + +\begin_layout Standard +The callback functions in tdb_set_logging_function (after +\begin_inset CommandInset ref +LatexCommand ref +reference "tdb_log_func-Doesnt-Take" + +\end_inset + + is resolved), tdb_parse_record, tdb_traverse, tdb_traverse_read and tdb_check + all take void * and must internally convert it to the argument type they + were expecting. +\end_layout + +\begin_layout Standard +If this type changes, the compiler will not produce warnings on the callers, + since it only sees void *. +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +With careful use of macros, we can create callback functions which give + a warning when used on gcc and the types of the callback and its private + argument differ. + Unsupported compilers will not give a warning, which is no worse than now. + In addition, the callbacks become clearer, as they need not use void * + for their parameter. +\end_layout + +\begin_layout Standard +See CCAN's typesafe_cb module at http://ccan.ozlabs.org/info/typesafe_cb.html +\end_layout + +\begin_layout Subsubsection +Status +\end_layout + +\begin_layout Standard +Incomplete. +\end_layout + +\begin_layout Subsection +TDB_CLEAR_IF_FIRST Must Be Specified On All Opens, tdb_reopen_all Problematic +\end_layout + +\begin_layout Standard +The TDB_CLEAR_IF_FIRST flag to tdb_open indicates that the TDB file should + be cleared if the caller discovers it is the only process with the TDB + open. + However, if any caller does not specify TDB_CLEAR_IF_FIRST it will not + be detected, so will have the TDB erased underneath them (usually resulting + in a crash). +\end_layout + +\begin_layout Standard +There is a similar issue on fork(); if the parent exits (or otherwise closes + the tdb) before the child calls tdb_reopen_all() to establish the lock + used to indicate the TDB is opened by someone, a TDB_CLEAR_IF_FIRST opener + at that moment will believe it alone has opened the TDB and will erase + it. +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +Remove TDB_CLEAR_IF_FIRST. + Other workarounds are possible, but see +\begin_inset CommandInset ref +LatexCommand ref +reference "TDB_CLEAR_IF_FIRST-Imposes-Performance" + +\end_inset + +. +\end_layout + +\begin_layout Subsubsection +Status +\end_layout + +\begin_layout Standard + +\change_deleted 0 1298979699 +Incomplete, TDB_CLEAR_IF_FIRST still defined, but not implemented. +\change_inserted 0 1298979700 +Complete. +\change_unchanged + +\end_layout + +\begin_layout Subsection +Extending The Header Is Difficult +\end_layout + +\begin_layout Standard +We have reserved (zeroed) words in the TDB header, which can be used for + future features. + If the future features are compulsory, the version number must be updated + to prevent old code from accessing the database. + But if the future feature is optional, we have no way of telling if older + code is accessing the database or not. +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +The header should contain a +\begin_inset Quotes eld +\end_inset + +format variant +\begin_inset Quotes erd +\end_inset + + value (64-bit). + This is divided into two 32-bit parts: +\end_layout + +\begin_layout Enumerate +The lower part reflects the format variant understood by code accessing + the database. +\end_layout + +\begin_layout Enumerate +The upper part reflects the format variant you must understand to write + to the database (otherwise you can only open for reading). +\end_layout + +\begin_layout Standard +The latter field can only be written at creation time, the former should + be written under the OPEN_LOCK when opening the database for writing, if + the variant of the code is lower than the current lowest variant. +\end_layout + +\begin_layout Standard +This should allow backwards-compatible features to be added, and detection + if older code (which doesn't understand the feature) writes to the database. +\end_layout + +\begin_layout Subsubsection +Status +\end_layout + +\begin_layout Standard +Incomplete. +\end_layout + +\begin_layout Subsection +Record Headers Are Not Expandible +\end_layout + +\begin_layout Standard +If we later want to add (say) checksums on keys and data, it would require + another format change, which we'd like to avoid. +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +We often have extra padding at the tail of a record. + If we ensure that the first byte (if any) of this padding is zero, we will + have a way for future changes to detect code which doesn't understand a + new format: the new code would write (say) a 1 at the tail, and thus if + there is no tail or the first byte is 0, we would know the extension is + not present on that record. +\end_layout + +\begin_layout Subsubsection +Status +\end_layout + +\begin_layout Standard +Incomplete. +\end_layout + +\begin_layout Subsection +TDB Does Not Use Talloc +\end_layout + +\begin_layout Standard +Many users of TDB (particularly Samba) use the talloc allocator, and thus + have to wrap TDB in a talloc context to use it conveniently. +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +The allocation within TDB is not complicated enough to justify the use of + talloc, and I am reluctant to force another (excellent) library on TDB + users. + Nonetheless a compromise is possible. + An attribute (see +\begin_inset CommandInset ref +LatexCommand ref +reference "attributes" + +\end_inset + +) can be added later to tdb_open() to provide an alternate allocation mechanism, + specifically for talloc but usable by any other allocator (which would + ignore the +\begin_inset Quotes eld +\end_inset + +context +\begin_inset Quotes erd +\end_inset + + argument). +\end_layout + +\begin_layout Standard +This would form a talloc heirarchy as expected, but the caller would still + have to attach a destructor to the tdb context returned from tdb_open to + close it. + All TDB_DATA fields would be children of the tdb_context, and the caller + would still have to manage them (using talloc_free() or talloc_steal()). +\end_layout + +\begin_layout Subsubsection +Status +\end_layout + +\begin_layout Standard +Deferred. +\end_layout + +\begin_layout Section +Performance And Scalability Issues +\end_layout + +\begin_layout Subsection +\begin_inset CommandInset label +LatexCommand label +name "TDB_CLEAR_IF_FIRST-Imposes-Performance" + +\end_inset + +TDB_CLEAR_IF_FIRST Imposes Performance Penalty +\end_layout + +\begin_layout Standard +When TDB_CLEAR_IF_FIRST is specified, a 1-byte read lock is placed at offset + 4 (aka. + the ACTIVE_LOCK). + While these locks never conflict in normal tdb usage, they do add substantial + overhead for most fcntl lock implementations when the kernel scans to detect + if a lock conflict exists. + This is often a single linked list, making the time to acquire and release + a fcntl lock O(N) where N is the number of processes with the TDB open, + not the number actually doing work. +\end_layout + +\begin_layout Standard +In a Samba server it is common to have huge numbers of clients sitting idle, + and thus they have weaned themselves off the TDB_CLEAR_IF_FIRST flag. +\begin_inset Foot +status collapsed + +\begin_layout Plain Layout +There is a flag to tdb_reopen_all() which is used for this optimization: + if the parent process will outlive the child, the child does not need the + ACTIVE_LOCK. + This is a workaround for this very performance issue. +\end_layout + +\end_inset + + +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +Remove the flag. + It was a neat idea, but even trivial servers tend to know when they are + initializing for the first time and can simply unlink the old tdb at that + point. +\end_layout + +\begin_layout Subsubsection +Status +\end_layout + +\begin_layout Standard + +\change_deleted 0 1298979837 +Incomplete; TDB_CLEAR_IF_FIRST still defined, but does nothing. +\change_inserted 0 1298979837 +Complete. +\change_unchanged + +\end_layout + +\begin_layout Subsection +TDB Files Have a 4G Limit +\end_layout + +\begin_layout Standard +This seems to be becoming an issue (so much for +\begin_inset Quotes eld +\end_inset + +trivial +\begin_inset Quotes erd +\end_inset + +!), particularly for ldb. +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +A new, incompatible TDB format which uses 64 bit offsets internally rather + than 32 bit as now. + For simplicity of endian conversion (which TDB does on the fly if required), + all values will be 64 bit on disk. + In practice, some upper bits may be used for other purposes, but at least + 56 bits will be available for file offsets. +\end_layout + +\begin_layout Standard +tdb_open() will automatically detect the old version, and even create them + if TDB_VERSION6 is specified to tdb_open. +\end_layout + +\begin_layout Standard +32 bit processes will still be able to access TDBs larger than 4G (assuming + that their off_t allows them to seek to 64 bits), they will gracefully + fall back as they fail to mmap. + This can happen already with large TDBs. +\end_layout + +\begin_layout Standard +Old versions of tdb will fail to open the new TDB files (since 28 August + 2009, commit 398d0c29290: prior to that any unrecognized file format would + be erased and initialized as a fresh tdb!) +\end_layout + +\begin_layout Subsubsection +Status +\end_layout + +\begin_layout Standard +Complete. +\end_layout + +\begin_layout Subsection +TDB Records Have a 4G Limit +\end_layout + +\begin_layout Standard +This has not been a reported problem, and the API uses size_t which can + be 64 bit on 64 bit platforms. + However, other limits may have made such an issue moot. +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +Record sizes will be 64 bit, with an error returned on 32 bit platforms + which try to access such records (the current implementation would return + TDB_ERR_OOM in a similar case). + It seems unlikely that 32 bit keys will be a limitation, so the implementation + may not support this (see +\begin_inset CommandInset ref +LatexCommand ref +reference "sub:Records-Incur-A" + +\end_inset + +). +\end_layout + +\begin_layout Subsubsection +Status +\end_layout + +\begin_layout Standard +Complete. +\end_layout + +\begin_layout Subsection +Hash Size Is Determined At TDB Creation Time +\end_layout + +\begin_layout Standard +TDB contains a number of hash chains in the header; the number is specified + at creation time, and defaults to 131. + This is such a bottleneck on large databases (as each hash chain gets quite + long), that LDB uses 10,000 for this hash. + In general it is impossible to know what the 'right' answer is at database + creation time. +\end_layout + +\begin_layout Subsubsection +\begin_inset CommandInset label +LatexCommand label +name "sub:Hash-Size-Solution" + +\end_inset + +Proposed Solution +\end_layout + +\begin_layout Standard +After comprehensive performance testing on various scalable hash variants +\begin_inset Foot +status collapsed + +\begin_layout Plain Layout +http://rusty.ozlabs.org/?p=89 and http://rusty.ozlabs.org/?p=94 This was annoying + because I was previously convinced that an expanding tree of hashes would + be very close to optimal. +\end_layout + +\end_inset + +, it became clear that it is hard to beat a straight linear hash table which + doubles in size when it reaches saturation. + Unfortunately, altering the hash table introduces serious locking complications +: the entire hash table needs to be locked to enlarge the hash table, and + others might be holding locks. + Particularly insidious are insertions done under tdb_chainlock. +\end_layout + +\begin_layout Standard +Thus an expanding layered hash will be used: an array of hash groups, with + each hash group exploding into pointers to lower hash groups once it fills, + turning into a hash tree. + This has implications for locking: we must lock the entire group in case + we need to expand it, yet we don't know how deep the tree is at that point. +\end_layout + +\begin_layout Standard +Note that bits from the hash table entries should be stolen to hold more + hash bits to reduce the penalty of collisions. + We can use the otherwise-unused lower 3 bits. + If we limit the size of the database to 64 exabytes, we can use the top + 8 bits of the hash entry as well. + These 11 bits would reduce false positives down to 1 in 2000 which is more + than we need: we can use one of the bits to indicate that the extra hash + bits are valid. + This means we can choose not to re-hash all entries when we expand a hash + group; simply use the next bits we need and mark them invalid. +\end_layout + +\begin_layout Subsubsection +Status +\end_layout + +\begin_layout Standard +Complete. +\end_layout + +\begin_layout Subsection +\begin_inset CommandInset label +LatexCommand label +name "TDB-Freelist-Is" + +\end_inset + +TDB Freelist Is Highly Contended +\end_layout + +\begin_layout Standard +TDB uses a single linked list for the free list. + Allocation occurs as follows, using heuristics which have evolved over + time: +\end_layout + +\begin_layout Enumerate +Get the free list lock for this whole operation. +\end_layout + +\begin_layout Enumerate +Multiply length by 1.25, so we always over-allocate by 25%. +\end_layout + +\begin_layout Enumerate +Set the slack multiplier to 1. +\end_layout + +\begin_layout Enumerate +Examine the current freelist entry: if it is > length but < the current + best case, remember it as the best case. +\end_layout + +\begin_layout Enumerate +Multiply the slack multiplier by 1.05. +\end_layout + +\begin_layout Enumerate +If our best fit so far is less than length * slack multiplier, return it. + The slack will be turned into a new free record if it's large enough. +\end_layout + +\begin_layout Enumerate +Otherwise, go onto the next freelist entry. +\end_layout + +\begin_layout Standard +Deleting a record occurs as follows: +\end_layout + +\begin_layout Enumerate +Lock the hash chain for this whole operation. +\end_layout + +\begin_layout Enumerate +Walk the chain to find the record, keeping the prev pointer offset. +\end_layout + +\begin_layout Enumerate +If max_dead is non-zero: +\end_layout + +\begin_deeper +\begin_layout Enumerate +Walk the hash chain again and count the dead records. +\end_layout + +\begin_layout Enumerate +If it's more than max_dead, bulk free all the dead ones (similar to steps + 4 and below, but the lock is only obtained once). +\end_layout + +\begin_layout Enumerate +Simply mark this record as dead and return. + +\end_layout + +\end_deeper +\begin_layout Enumerate +Get the free list lock for the remainder of this operation. +\end_layout + +\begin_layout Enumerate +\begin_inset CommandInset label +LatexCommand label +name "right-merging" + +\end_inset + +Examine the following block to see if it is free; if so, enlarge the current + block and remove that block from the free list. + This was disabled, as removal from the free list was O(entries-in-free-list). +\end_layout + +\begin_layout Enumerate +Examine the preceeding block to see if it is free: for this reason, each + block has a 32-bit tailer which indicates its length. + If it is free, expand it to cover our new block and return. +\end_layout + +\begin_layout Enumerate +Otherwise, prepend ourselves to the free list. +\end_layout + +\begin_layout Standard +Disabling right-merging (step +\begin_inset CommandInset ref +LatexCommand ref +reference "right-merging" + +\end_inset + +) causes fragmentation; the other heuristics proved insufficient to address + this, so the final answer to this was that when we expand the TDB file + inside a transaction commit, we repack the entire tdb. +\end_layout + +\begin_layout Standard +The single list lock limits our allocation rate; due to the other issues + this is not currently seen as a bottleneck. +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +The first step is to remove all the current heuristics, as they obviously + interact, then examine them once the lock contention is addressed. +\end_layout + +\begin_layout Standard +The free list must be split to reduce contention. + Assuming perfect free merging, we can at most have 1 free list entry for + each entry. + This implies that the number of free lists is related to the size of the + hash table, but as it is rare to walk a large number of free list entries + we can use far fewer, say 1/32 of the number of hash buckets. +\end_layout + +\begin_layout Standard +It seems tempting to try to reuse the hash implementation which we use for + records here, but we have two ways of searching for free entries: for allocatio +n we search by size (and possibly zone) which produces too many clashes + for our hash table to handle well, and for coalescing we search by address. + Thus an array of doubly-linked free lists seems preferable. +\end_layout + +\begin_layout Standard +There are various benefits in using per-size free lists (see +\begin_inset CommandInset ref +LatexCommand ref +reference "sub:TDB-Becomes-Fragmented" + +\end_inset + +) but it's not clear this would reduce contention in the common case where + all processes are allocating/freeing the same size. + Thus we almost certainly need to divide in other ways: the most obvious + is to divide the file into zones, and using a free list (or table of free + lists) for each. + This approximates address ordering. +\end_layout + +\begin_layout Standard +Unfortunately it is difficult to know what heuristics should be used to + determine zone sizes, and our transaction code relies on being able to + create a +\begin_inset Quotes eld +\end_inset + +recovery area +\begin_inset Quotes erd +\end_inset + + by simply appending to the file (difficult if it would need to create a + new zone header). + Thus we use a linked-list of free tables; currently we only ever create + one, but if there is more than one we choose one at random to use. + In future we may use heuristics to add new free tables on contention. + We only expand the file when all free tables are exhausted. +\end_layout + +\begin_layout Standard +The basic algorithm is as follows. + Freeing is simple: +\end_layout + +\begin_layout Enumerate +Identify the correct free list. +\end_layout + +\begin_layout Enumerate +Lock the corresponding list. +\end_layout + +\begin_layout Enumerate +Re-check the list (we didn't have a lock, sizes could have changed): relock + if necessary. +\end_layout + +\begin_layout Enumerate +Place the freed entry in the list. +\end_layout + +\begin_layout Standard +Allocation is a little more complicated, as we perform delayed coalescing + at this point: +\end_layout + +\begin_layout Enumerate +Pick a free table; usually the previous one. +\end_layout + +\begin_layout Enumerate +Lock the corresponding list. +\end_layout + +\begin_layout Enumerate +If the top entry is -large enough, remove it from the list and return it. +\end_layout + +\begin_layout Enumerate +Otherwise, coalesce entries in the list.If there was no entry large enough, + unlock the list and try the next largest list +\end_layout + +\begin_layout Enumerate +If no list has an entry which meets our needs, try the next free table. +\end_layout + +\begin_layout Enumerate +If no zone satisfies, expand the file. +\end_layout + +\begin_layout Standard +This optimizes rapid insert/delete of free list entries by not coalescing + them all the time.. + First-fit address ordering ordering seems to be fairly good for keeping + fragmentation low (see +\begin_inset CommandInset ref +LatexCommand ref +reference "sub:TDB-Becomes-Fragmented" + +\end_inset + +). + Note that address ordering does not need a tailer to coalesce, though if + we needed one we could have one cheaply: see +\begin_inset CommandInset ref +LatexCommand ref +reference "sub:Records-Incur-A" + +\end_inset + +. + +\end_layout + +\begin_layout Standard +Each free entry has the free table number in the header: less than 255. + It also contains a doubly-linked list for easy deletion. +\end_layout + +\begin_layout Subsection +\begin_inset CommandInset label +LatexCommand label +name "sub:TDB-Becomes-Fragmented" + +\end_inset + +TDB Becomes Fragmented +\end_layout + +\begin_layout Standard +Much of this is a result of allocation strategy +\begin_inset Foot +status collapsed + +\begin_layout Plain Layout +The Memory Fragmentation Problem: Solved? Johnstone & Wilson 1995 ftp://ftp.cs.ute +xas.edu/pub/garbage/malloc/ismm98.ps +\end_layout + +\end_inset + + and deliberate hobbling of coalescing; internal fragmentation (aka overallocati +on) is deliberately set at 25%, and external fragmentation is only cured + by the decision to repack the entire db when a transaction commit needs + to enlarge the file. +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +The 25% overhead on allocation works in practice for ldb because indexes + tend to expand by one record at a time. + This internal fragmentation can be resolved by having an +\begin_inset Quotes eld +\end_inset + +expanded +\begin_inset Quotes erd +\end_inset + + bit in the header to note entries that have previously expanded, and allocating + more space for them. +\end_layout + +\begin_layout Standard +There are is a spectrum of possible solutions for external fragmentation: + one is to use a fragmentation-avoiding allocation strategy such as best-fit + address-order allocator. + The other end of the spectrum would be to use a bump allocator (very fast + and simple) and simply repack the file when we reach the end. +\end_layout + +\begin_layout Standard +There are three problems with efficient fragmentation-avoiding allocators: + they are non-trivial, they tend to use a single free list for each size, + and there's no evidence that tdb allocation patterns will match those recorded + for general allocators (though it seems likely). +\end_layout + +\begin_layout Standard +Thus we don't spend too much effort on external fragmentation; we will be + no worse than the current code if we need to repack on occasion. + More effort is spent on reducing freelist contention, and reducing overhead. +\end_layout + +\begin_layout Subsection +\begin_inset CommandInset label +LatexCommand label +name "sub:Records-Incur-A" + +\end_inset + +Records Incur A 28-Byte Overhead +\end_layout + +\begin_layout Standard +Each TDB record has a header as follows: +\end_layout + +\begin_layout LyX-Code +struct tdb_record { +\end_layout + +\begin_layout LyX-Code + tdb_off_t next; /* offset of the next record in the list */ +\end_layout + +\begin_layout LyX-Code + tdb_len_t rec_len; /* total byte length of record */ +\end_layout + +\begin_layout LyX-Code + tdb_len_t key_len; /* byte length of key */ +\end_layout + +\begin_layout LyX-Code + tdb_len_t data_len; /* byte length of data */ +\end_layout + +\begin_layout LyX-Code + uint32_t full_hash; /* the full 32 bit hash of the key */ +\end_layout + +\begin_layout LyX-Code + uint32_t magic; /* try to catch errors */ +\end_layout + +\begin_layout LyX-Code + /* the following union is implied: +\end_layout + +\begin_layout LyX-Code + union { +\end_layout + +\begin_layout LyX-Code + char record[rec_len]; +\end_layout + +\begin_layout LyX-Code + struct { +\end_layout + +\begin_layout LyX-Code + char key[key_len]; +\end_layout + +\begin_layout LyX-Code + char data[data_len]; +\end_layout + +\begin_layout LyX-Code + } +\end_layout + +\begin_layout LyX-Code + uint32_t totalsize; (tailer) +\end_layout + +\begin_layout LyX-Code + } +\end_layout + +\begin_layout LyX-Code + */ +\end_layout + +\begin_layout LyX-Code +}; +\end_layout + +\begin_layout Standard +Naively, this would double to a 56-byte overhead on a 64 bit implementation. +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +We can use various techniques to reduce this for an allocated block: +\end_layout + +\begin_layout Enumerate +The 'next' pointer is not required, as we are using a flat hash table. +\end_layout + +\begin_layout Enumerate +'rec_len' can instead be expressed as an addition to key_len and data_len + (it accounts for wasted or overallocated length in the record). + Since the record length is always a multiple of 8, we can conveniently + fit it in 32 bits (representing up to 35 bits). +\end_layout + +\begin_layout Enumerate +'key_len' and 'data_len' can be reduced. + I'm unwilling to restrict 'data_len' to 32 bits, but instead we can combine + the two into one 64-bit field and using a 5 bit value which indicates at + what bit to divide the two. + Keys are unlikely to scale as fast as data, so I'm assuming a maximum key + size of 32 bits. +\end_layout + +\begin_layout Enumerate +'full_hash' is used to avoid a memcmp on the +\begin_inset Quotes eld +\end_inset + +miss +\begin_inset Quotes erd +\end_inset + + case, but this is diminishing returns after a handful of bits (at 10 bits, + it reduces 99.9% of false memcmp). + As an aside, as the lower bits are already incorporated in the hash table + resolution, the upper bits should be used here. + Note that it's not clear that these bits will be a win, given the extra + bits in the hash table itself (see +\begin_inset CommandInset ref +LatexCommand ref +reference "sub:Hash-Size-Solution" + +\end_inset + +). +\end_layout + +\begin_layout Enumerate +'magic' does not need to be enlarged: it currently reflects one of 5 values + (used, free, dead, recovery, and unused_recovery). + It is useful for quick sanity checking however, and should not be eliminated. +\end_layout + +\begin_layout Enumerate +'tailer' is only used to coalesce free blocks (so a block to the right can + find the header to check if this block is free). + This can be replaced by a single 'free' bit in the header of the following + block (and the tailer only exists in free blocks). +\begin_inset Foot +status collapsed + +\begin_layout Plain Layout +This technique from Thomas Standish. + Data Structure Techniques. + Addison-Wesley, Reading, Massachusetts, 1980. +\end_layout + +\end_inset + + The current proposed coalescing algorithm doesn't need this, however. +\end_layout + +\begin_layout Standard +This produces a 16 byte used header like this: +\end_layout + +\begin_layout LyX-Code +struct tdb_used_record { +\end_layout + +\begin_layout LyX-Code + uint32_t used_magic : 16, +\end_layout + +\begin_layout LyX-Code + +\end_layout + +\begin_layout LyX-Code + key_data_divide: 5, +\end_layout + +\begin_layout LyX-Code + top_hash: 11; +\end_layout + +\begin_layout LyX-Code + uint32_t extra_octets; +\end_layout + +\begin_layout LyX-Code + uint64_t key_and_data_len; +\end_layout + +\begin_layout LyX-Code +}; +\end_layout + +\begin_layout Standard +And a free record like this: +\end_layout + +\begin_layout LyX-Code +struct tdb_free_record { +\end_layout + +\begin_layout LyX-Code + uint64_t free_magic: 8, +\end_layout + +\begin_layout LyX-Code + prev : 56; +\end_layout + +\begin_layout LyX-Code + +\end_layout + +\begin_layout LyX-Code + uint64_t free_table: 8, +\end_layout + +\begin_layout LyX-Code + total_length : 56 +\end_layout + +\begin_layout LyX-Code + uint64_t next;; +\end_layout + +\begin_layout LyX-Code +}; +\end_layout + +\begin_layout Standard + +\change_deleted 0 1291206079 + +\change_unchanged +Note that by limiting valid offsets to 56 bits, we can pack everything we + need into 3 64-byte words, meaning our minimum record size is 8 bytes. +\end_layout + +\begin_layout Subsubsection +Status +\end_layout + +\begin_layout Standard +Complete. +\end_layout + +\begin_layout Subsection +Transaction Commit Requires 4 fdatasync +\end_layout + +\begin_layout Standard +The current transaction algorithm is: +\end_layout + +\begin_layout Enumerate +write_recovery_data(); +\end_layout + +\begin_layout Enumerate +sync(); +\end_layout + +\begin_layout Enumerate +write_recovery_header(); +\end_layout + +\begin_layout Enumerate +sync(); +\end_layout + +\begin_layout Enumerate +overwrite_with_new_data(); +\end_layout + +\begin_layout Enumerate +sync(); +\end_layout + +\begin_layout Enumerate +remove_recovery_header(); +\end_layout + +\begin_layout Enumerate +sync(); +\end_layout + +\begin_layout Standard +On current ext3, each sync flushes all data to disk, so the next 3 syncs + are relatively expensive. + But this could become a performance bottleneck on other filesystems such + as ext4. +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +Neil Brown points out that this is overzealous, and only one sync is needed: +\end_layout + +\begin_layout Enumerate +Bundle the recovery data, a transaction counter and a strong checksum of + the new data. +\end_layout + +\begin_layout Enumerate +Strong checksum that whole bundle. +\end_layout + +\begin_layout Enumerate +Store the bundle in the database. +\end_layout + +\begin_layout Enumerate +Overwrite the oldest of the two recovery pointers in the header (identified + using the transaction counter) with the offset of this bundle. +\end_layout + +\begin_layout Enumerate +sync. +\end_layout + +\begin_layout Enumerate +Write the new data to the file. +\end_layout + +\begin_layout Standard +Checking for recovery means identifying the latest bundle with a valid checksum + and using the new data checksum to ensure that it has been applied. + This is more expensive than the current check, but need only be done at + open. + For running databases, a separate header field can be used to indicate + a transaction in progress; we need only check for recovery if this is set. +\end_layout + +\begin_layout Subsubsection +Status +\end_layout + +\begin_layout Standard +Deferred. +\end_layout + +\begin_layout Subsection +\begin_inset CommandInset label +LatexCommand label +name "sub:TDB-Does-Not" + +\end_inset + +TDB Does Not Have Snapshot Support +\end_layout + +\begin_layout Subsubsection +Proposed SolutionNone. + At some point you say +\begin_inset Quotes eld +\end_inset + +use a real database +\begin_inset Quotes erd +\end_inset + + (but see +\begin_inset CommandInset ref +LatexCommand ref +reference "replay-attribute" + +\end_inset + +). +\end_layout + +\begin_layout Standard +But as a thought experiment, if we implemented transactions to only overwrite + free entries (this is tricky: there must not be a header in each entry + which indicates whether it is free, but use of presence in metadata elsewhere), + and a pointer to the hash table, we could create an entirely new commit + without destroying existing data. + Then it would be easy to implement snapshots in a similar way. +\end_layout + +\begin_layout Standard +This would not allow arbitrary changes to the database, such as tdb_repack + does, and would require more space (since we have to preserve the current + and future entries at once). + If we used hash trees rather than one big hash table, we might only have + to rewrite some sections of the hash, too. +\end_layout + +\begin_layout Standard +We could then implement snapshots using a similar method, using multiple + different hash tables/free tables. +\end_layout + +\begin_layout Subsubsection +Status +\end_layout + +\begin_layout Standard +Deferred. +\end_layout + +\begin_layout Subsection +Transactions Cannot Operate in Parallel +\end_layout + +\begin_layout Standard +This would be useless for ldb, as it hits the index records with just about + every update. + It would add significant complexity in resolving clashes, and cause the + all transaction callers to write their code to loop in the case where the + transactions spuriously failed. +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +None (but see +\begin_inset CommandInset ref +LatexCommand ref +reference "replay-attribute" + +\end_inset + +). + We could solve a small part of the problem by providing read-only transactions. + These would allow one write transaction to begin, but it could not commit + until all r/o transactions are done. + This would require a new RO_TRANSACTION_LOCK, which would be upgraded on + commit. +\end_layout + +\begin_layout Subsubsection +Status +\end_layout + +\begin_layout Standard +Deferred. +\end_layout + +\begin_layout Subsection +Default Hash Function Is Suboptimal +\end_layout + +\begin_layout Standard +The Knuth-inspired multiplicative hash used by tdb is fairly slow (especially + if we expand it to 64 bits), and works best when the hash bucket size is + a prime number (which also means a slow modulus). + In addition, it is highly predictable which could potentially lead to a + Denial of Service attack in some TDB uses. +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +The Jenkins lookup3 hash +\begin_inset Foot +status open + +\begin_layout Plain Layout +http://burtleburtle.net/bob/c/lookup3.c +\end_layout + +\end_inset + + is a fast and superbly-mixing hash. + It's used by the Linux kernel and almost everything else. + This has the particular properties that it takes an initial seed, and produces + two 32 bit hash numbers, which we can combine into a 64-bit hash. +\end_layout + +\begin_layout Standard +The seed should be created at tdb-creation time from some random source, + and placed in the header. + This is far from foolproof, but adds a little bit of protection against + hash bombing. +\end_layout + +\begin_layout Subsubsection +Status +\end_layout + +\begin_layout Standard +Complete. +\end_layout + +\begin_layout Subsection +\begin_inset CommandInset label +LatexCommand label +name "Reliable-Traversal-Adds" + +\end_inset + +Reliable Traversal Adds Complexity +\end_layout + +\begin_layout Standard +We lock a record during traversal iteration, and try to grab that lock in + the delete code. + If that grab on delete fails, we simply mark it deleted and continue onwards; + traversal checks for this condition and does the delete when it moves off + the record. +\end_layout + +\begin_layout Standard +If traversal terminates, the dead record may be left indefinitely. +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +Remove reliability guarantees; see +\begin_inset CommandInset ref +LatexCommand ref +reference "traverse-Proposed-Solution" + +\end_inset + +. +\end_layout + +\begin_layout Subsubsection +Status +\end_layout + +\begin_layout Standard +Complete. +\end_layout + +\begin_layout Subsection +Fcntl Locking Adds Overhead +\end_layout + +\begin_layout Standard +Placing a fcntl lock means a system call, as does removing one. + This is actually one reason why transactions can be faster (everything + is locked once at transaction start). + In the uncontended case, this overhead can theoretically be eliminated. +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +None. +\end_layout + +\begin_layout Standard +We tried this before with spinlock support, in the early days of TDB, and + it didn't make much difference except in manufactured benchmarks. +\end_layout + +\begin_layout Standard +We could use spinlocks (with futex kernel support under Linux), but it means + that we lose automatic cleanup when a process dies with a lock. + There is a method of auto-cleanup under Linux, but it's not supported by + other operating systems. + We could reintroduce a clear-if-first-style lock and sweep for dead futexes + on open, but that wouldn't help the normal case of one concurrent opener + dying. + Increasingly elaborate repair schemes could be considered, but they require + an ABI change (everyone must use them) anyway, so there's no need to do + this at the same time as everything else. +\end_layout + +\begin_layout Subsection +Some Transactions Don't Require Durability +\end_layout + +\begin_layout Standard +Volker points out that gencache uses a CLEAR_IF_FIRST tdb for normal (fast) + usage, and occasionally empties the results into a transactional TDB. + This kind of usage prioritizes performance over durability: as long as + we are consistent, data can be lost. +\end_layout + +\begin_layout Standard +This would be more neatly implemented inside tdb: a +\begin_inset Quotes eld +\end_inset + +soft +\begin_inset Quotes erd +\end_inset + + transaction commit (ie. + syncless) which meant that data may be reverted on a crash. +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\end_layout + +\begin_layout Standard +None. +\end_layout + +\begin_layout Standard +Unfortunately any transaction scheme which overwrites old data requires + a sync before that overwrite to avoid the possibility of corruption. +\end_layout + +\begin_layout Standard +It seems possible to use a scheme similar to that described in +\begin_inset CommandInset ref +LatexCommand ref +reference "sub:TDB-Does-Not" + +\end_inset + +,where transactions are committed without overwriting existing data, and + an array of top-level pointers were available in the header. + If the transaction is +\begin_inset Quotes eld +\end_inset + +soft +\begin_inset Quotes erd +\end_inset + + then we would not need a sync at all: existing processes would pick up + the new hash table and free list and work with that. +\end_layout + +\begin_layout Standard +At some later point, a sync would allow recovery of the old data into the + free lists (perhaps when the array of top-level pointers filled). + On crash, tdb_open() would examine the array of top levels, and apply the + transactions until it encountered an invalid checksum. +\end_layout + +\begin_layout Subsection +Tracing Is Fragile, Replay Is External +\end_layout + +\begin_layout Standard +The current TDB has compile-time-enabled tracing code, but it often breaks + as it is not enabled by default. + In a similar way, the ctdb code has an external wrapper which does replay + tracing so it can coordinate cluster-wide transactions. +\end_layout + +\begin_layout Subsubsection +Proposed Solution +\begin_inset CommandInset label +LatexCommand label +name "replay-attribute" + +\end_inset + + +\end_layout + +\begin_layout Standard +Tridge points out that an attribute can be later added to tdb_open (see + +\begin_inset CommandInset ref +LatexCommand ref +reference "attributes" + +\end_inset + +) to provide replay/trace hooks, which could become the basis for this and + future parallel transactions and snapshot support. +\end_layout + +\begin_layout Subsubsection +Status +\end_layout + +\begin_layout Standard +Deferred. +\end_layout + +\end_body +\end_document +@ + + +1.12 +log +@Add status, some fixes, linked freelists. +@ +text +@d53 1 +a53 7 + +\change_deleted 0 1291204535 +14-September +\change_inserted 0 1291204533 +1-December +\change_unchanged +-2010 +a580 2 +\change_inserted 0 1291204563 + +a583 2 + +\change_inserted 0 1291204572 +a587 2 + +\change_inserted 0 1291204573 +a588 2 +\change_unchanged + +a629 2 +\change_inserted 0 1291204588 + +a632 2 + +\change_inserted 0 1291204588 +a636 2 + +\change_inserted 0 1291204631 +a639 2 +\change_unchanged + +a693 2 +\change_inserted 0 1291204639 + +a696 2 + +\change_inserted 0 1291204640 +d702 1 +a702 1 +\change_inserted 0 1291204665 +d704 2 +a728 2 +\change_inserted 0 1291204671 + +a731 2 + +\change_inserted 0 1291204671 +a735 2 + +\change_inserted 0 1291204673 +a736 2 +\change_unchanged + +a780 2 +\change_inserted 0 1291204731 + +a783 2 + +\change_inserted 0 1291204732 +a787 2 + +\change_inserted 0 1291204779 +a790 2 +\change_unchanged + +a842 2 +\change_inserted 0 1291204830 + +a845 2 + +\change_inserted 0 1291204831 +a849 2 + +\change_inserted 0 1291204834 +a850 2 +\change_unchanged + +d879 9 +a887 2 + deal of churn; we are better to guarantee that the tdb_errcode is per-thread + so the current programming model can be maintained. +d891 9 +d903 2 +a922 2 +\change_inserted 0 1291204847 + +a925 2 + +\change_inserted 0 1291204847 +d930 5 +a934 3 + +\change_inserted 0 1291204852 +Incomplete. +a1051 2 +\change_inserted 0 1291204881 + +a1054 2 + +\change_inserted 0 1291204881 +a1058 2 + +\change_inserted 0 1291204885 +a1059 2 +\change_unchanged + +a1140 2 +\change_inserted 0 1291204898 + +a1143 2 + +\change_inserted 0 1291204898 +a1147 2 + +\change_inserted 0 1291204901 +a1148 2 +\change_unchanged + +a1224 2 +\change_inserted 0 1291204908 + +a1227 2 + +\change_inserted 0 1291204908 +a1231 2 + +\change_inserted 0 1291204908 +a1232 2 +\change_unchanged + +a1271 2 +\change_inserted 0 1291204917 + +a1274 2 + +\change_inserted 0 1291204917 +a1278 2 + +\change_inserted 0 1291204920 +a1279 2 +\change_unchanged + +a1316 2 +\change_inserted 0 1291204927 + +a1319 2 + +\change_inserted 0 1291204928 +d1325 1 +a1325 1 +\change_inserted 0 1291204942 +d1327 2 +a1381 2 +\change_inserted 0 1291205003 + +a1384 2 + +\change_inserted 0 1291205004 +a1388 2 + +\change_inserted 0 1291205007 +a1411 2 +\change_inserted 0 1291205019 + +a1414 2 + +\change_inserted 0 1291205019 +a1418 2 + +\change_inserted 0 1291205023 +a1419 2 +\change_unchanged + +a1465 2 +\change_inserted 0 1291205029 + +a1468 2 + +\change_inserted 0 1291205029 +a1472 2 + +\change_inserted 0 1291206020 +a1473 2 +\change_unchanged + +a1528 2 +\change_inserted 0 1291205043 + +a1531 2 + +\change_inserted 0 1291205043 +d1537 1 +a1537 1 +\change_inserted 0 1291205057 +d1539 2 +a1589 2 +\change_inserted 0 1291205062 + +a1592 2 + +\change_inserted 0 1291205062 +a1596 2 + +\change_inserted 0 1291205062 +a1597 2 +\change_unchanged + +a1626 2 +\change_inserted 0 1291205072 + +a1629 2 + +\change_inserted 0 1291205073 +a1633 2 + +\change_inserted 0 1291205073 +a1634 2 +\change_unchanged + +a1674 4 + +\change_deleted 0 1291204504 + +\change_unchanged +a1699 2 +\change_inserted 0 1291205079 + +a1702 2 + +\change_inserted 0 1291205080 +a1706 2 + +\change_inserted 0 1291205080 +a1707 2 +\change_unchanged + +a1833 2 +\change_inserted 0 1291205090 + +d1869 2 +a1870 7 + is to divide the file into zones, and using a free list (or +\change_inserted 0 1291205498 +table +\change_deleted 0 1291205497 +set +\change_unchanged + of free lists) for each. +a1871 2 +\change_inserted 0 1291205203 + +a1874 2 + +\change_inserted 0 1291205358 +a1890 21 +\change_unchanged + +\end_layout + +\begin_layout Standard + +\change_deleted 0 1291205198 +Note that this means we need to split the free lists when we expand the + file; this is probably acceptable when we double the hash table size, since + that is such an expensive operation already. + In the case of increasing the file size, there is an optimization we can + use: if we use M in the formula above as the file size rounded up to the + next power of 2, we only need reshuffle free lists when the file size crosses + a power of 2 boundary, +\emph on +and +\emph default +reshuffling the free lists is trivial: we simply merge every consecutive + pair of free lists. +\change_unchanged + +d1899 1 +a1899 7 +Identify the correct +\change_inserted 0 1291205366 +free list +\change_deleted 0 1291205364 +zone +\change_unchanged +. +d1907 2 +a1908 7 +Re-check the +\change_inserted 0 1291205372 +list +\change_deleted 0 1291205371 +zone +\change_unchanged + (we didn't have a lock, sizes could have changed): relock if necessary. +d1912 1 +a1912 5 +Place the freed entry in the list +\change_deleted 0 1291205382 + for that zone +\change_unchanged +. +d1921 1 +a1921 15 +Pick a +\change_deleted 0 1291205403 +zone either the zone we last freed into, or based on a +\begin_inset Quotes eld +\end_inset + +random +\begin_inset Quotes erd +\end_inset + + number. +\change_inserted 0 1291205411 +free table; usually the previous one. +\change_unchanged + +a1925 10 +\change_deleted 0 1291205432 + +\end_layout + +\begin_layout Enumerate + +\change_deleted 0 1291205428 +Re-check the zone: relock if necessary. +\change_unchanged + +d1934 1 +a1934 7 + unlock the list and try the next +\change_inserted 0 1291205455 +largest list +\change_deleted 0 1291205452 +zone. +\change_inserted 0 1291205457 + +a1937 2 + +\change_inserted 0 1291205476 +a1938 2 +\change_unchanged + +a1966 2 +\change_inserted 0 1291205542 + +a1969 2 + +\change_inserted 0 1291205591 +a1971 70 +\change_unchanged + +\end_layout + +\begin_layout Standard + +\change_deleted 0 1291205539 +I anticipate that the number of entries in each free zone would be small, + but it might be worth using one free entry to hold pointers to the others + for cache efficiency. +\change_unchanged + +\end_layout + +\begin_layout Standard + +\change_deleted 0 1291205534 +\begin_inset CommandInset label +LatexCommand label +name "freelist-in-zone" + +\end_inset + +If we want to avoid locking complexity (enlarging the free lists when we + enlarge the file) we could place the array of free lists at the beginning + of each zone. + This means existing array lists never move, but means that a record cannot + be larger than a zone. + That in turn implies that zones should be variable sized (say, power of + 2), which makes the question +\begin_inset Quotes eld +\end_inset + +what zone is this record in? +\begin_inset Quotes erd +\end_inset + + much harder (and +\begin_inset Quotes eld +\end_inset + +pick a random zone +\begin_inset Quotes erd +\end_inset + +, but that's less common). + It could be done with as few as 4 bits from the record header. +\begin_inset Foot +status collapsed + +\begin_layout Plain Layout +Using +\begin_inset Formula $2^{16+N*3}$ +\end_inset + +means 0 gives a minimal 65536-byte zone, 15 gives the maximal +\begin_inset Formula $2^{61}$ +\end_inset + + byte zone. + Zones range in factor of 8 steps. + Given the zone size for the zone the current record is in, we can determine + the start of the zone. +\end_layout + +\end_inset + + +\change_inserted 0 1291205139 + +d2218 1 +a2218 5 + uint32_t +\change_inserted 0 1291205758 +used_ +\change_unchanged +magic : 16, +a2222 4 +\change_deleted 0 1291205693 + prev_is_free: 1, +\change_unchanged + +d2230 1 +a2230 7 + top_hash: 1 +\change_inserted 0 1291205704 +1 +\change_deleted 0 1291205704 +0 +\change_unchanged +; +d2254 1 +a2254 9 + uint +\change_inserted 0 1291205725 +64 +\change_deleted 0 1291205723 +32 +\change_unchanged +_t +\change_inserted 0 1291205753 +free_magic: 8, +a2257 2 + +\change_inserted 0 1291205746 +a2262 24 +\change_deleted 0 1291205749 +free_magic; +\change_unchanged + +\end_layout + +\begin_layout LyX-Code + uint64_t +\change_inserted 0 1291205786 +free_table: 8, +\end_layout + +\begin_layout LyX-Code + +\change_inserted 0 1291205788 + +\change_unchanged +total_length +\change_inserted 0 1291205792 + : 56 +\change_deleted 0 1291205790 +; +\change_unchanged + +d2266 1 +a2266 7 + uint64_t +\change_deleted 0 1291205801 +prev, +\change_unchanged +next; +\change_deleted 0 1291205811 + +d2270 1 +a2270 3 + +\change_deleted 0 1291205811 + ... +d2274 1 +a2274 5 + +\change_deleted 0 1291205808 + uint64_t tailer +\change_unchanged +; +d2283 5 +a2287 16 +\change_deleted 0 1291205827 +We might want to take some bits from the used record's top_hash (and the + free record which has 32 bits of padding to spare anyway) if we use variable + sized zones. + See +\begin_inset CommandInset ref +LatexCommand ref +reference "freelist-in-zone" + +\end_inset + +. + +\change_inserted 0 1291205885 + Note that by limiting valid offsets to 56 bits, we can pack everything + we need into 3 64-byte words, meaning our minimum record size is 8 bytes. +a2290 2 + +\change_inserted 0 1291205886 +a2294 2 + +\change_inserted 0 1291205886 +a2295 2 +\change_unchanged + +a2385 2 +\change_inserted 0 1291205894 + +a2388 2 + +\change_inserted 0 1291205894 +a2392 2 + +\change_inserted 0 1291205902 +a2393 2 +\change_unchanged + +a2415 4 + +\change_deleted 0 1291204504 + +\change_unchanged +a2445 2 +\change_inserted 0 1291205910 + +a2448 2 + +\change_inserted 0 1291205910 +a2452 2 + +\change_inserted 0 1291205914 +a2453 2 +\change_unchanged + +a2485 2 +\change_inserted 0 1291205919 + +a2488 2 + +\change_inserted 0 1291205919 +a2492 2 + +\change_inserted 0 1291205922 +a2493 2 +\change_unchanged + +a2533 2 +\change_inserted 0 1291205929 + +a2536 2 + +\change_inserted 0 1291205929 +a2540 2 + +\change_inserted 0 1291205929 +a2541 2 +\change_unchanged + +a2578 2 +\change_inserted 0 1291205932 + +a2581 2 + +\change_inserted 0 1291205933 +a2585 2 + +\change_inserted 0 1291205933 +a2586 2 +\change_unchanged + +a2724 2 +\change_inserted 0 1291205944 + +a2727 2 + +\change_inserted 0 1291205945 +a2731 2 + +\change_inserted 0 1291205948 +a2732 2 +\change_unchanged + +@ + + +1.11 +log +@Merge changes +@ +text +@d53 7 +a59 1 +14-September-2010 +d587 16 +d644 18 +d716 16 +d753 16 +d813 18 +d883 16 +d953 16 +d1084 16 +d1181 16 +d1273 16 +d1328 16 +d1381 16 +d1447 19 +a1465 2 + if older code (which doesn't understand the feature) writes to the database.Reco +rd Headers Are Not Expandible +d1484 16 +d1546 16 +d1617 16 +d1680 16 +d1725 16 +d1810 16 +d1951 8 +a1958 3 +Proposed SolutionThe first step is to remove all the current heuristics, + as they obviously interact, then examine them once the lock contention + is addressed. +d1989 7 +a1995 2 + is to divide the file into zones, and using a free list (or set of free + lists) for each. +d1997 2 +d2002 25 +d2039 2 +d2049 7 +a2055 1 +Identify the correct zone. +d2063 7 +a2069 2 +Re-check the zone (we didn't have a lock, sizes could have changed): relock + if necessary. +d2073 5 +a2077 1 +Place the freed entry in the list for that zone. +d2086 3 +a2088 1 +Pick a zone either the zone we last freed into, or based on a +d2097 4 +d2105 2 +d2110 2 +d2113 2 +d2123 15 +a2137 1 + unlock the list and try the next zone. +d2166 11 +d2180 2 +d2185 2 +d2190 2 +d2223 1 +a2223 1 +status open +d2243 2 +d2491 5 +a2495 1 + uint32_t magic : 16, +d2499 2 +d2502 2 +d2511 7 +a2517 1 + top_hash: 10; +d2541 29 +a2569 1 + uint32_t free_magic; +d2573 11 +a2583 1 + uint64_t total_length; +d2587 7 +a2593 1 + uint64_t prev, next; +d2597 2 +d2603 5 +a2607 1 + uint64_t tailer; +d2615 2 +d2628 18 +d2736 16 +d2808 16 +d2856 16 +d2912 16 +d2965 16 +d3119 16 +@ + + +1.10 +log +@Tracing attribute, talloc support. +@ +text +@d1 1 +a1 1 +#LyX 1.6.5 created this file. For more info see http://www.lyx.org/ +d53 1 +a53 7 + +\change_deleted 0 1283307542 +26-July +\change_inserted 0 1284423485 +14-September +\change_unchanged +-2010 +a472 2 +\change_inserted 0 1284422789 + +a479 2 +\change_unchanged + +a838 2 + +\change_inserted 0 1284016998 +a846 2 +\change_unchanged + +a1194 2 +\change_inserted 0 1284015637 + +a1197 2 + +\change_inserted 0 1284015716 +a1201 2 + +\change_inserted 0 1284015906 +a1210 2 + +\change_inserted 0 1284015637 +a1214 2 + +\change_inserted 0 1284016114 +a1227 2 + +\change_inserted 0 1284016149 +a1232 2 + +\change_inserted 0 1284016639 +a1237 2 + +\change_inserted 0 1284016821 +a1243 2 + +\change_inserted 0 1284016803 +d1245 2 +a1246 9 + if older code (which doesn't understand the feature) writes to the database. +\change_deleted 0 1284016101 + +\end_layout + +\begin_layout Subsection + +\change_inserted 0 1284015634 +Record Headers Are Not Expandible +a1249 2 + +\change_inserted 0 1284015634 +a1254 2 + +\change_inserted 0 1284015634 +a1258 2 + +\change_inserted 0 1284422552 +a1267 2 + +\change_inserted 0 1284422568 +a1271 2 + +\change_inserted 0 1284422646 +a1276 2 + +\change_inserted 0 1284422656 +a1280 2 + +\change_inserted 0 1284423065 +a1305 2 + +\change_inserted 0 1284423042 +a1310 2 +\change_unchanged + +a1457 2 + +\change_inserted 0 1283336713 +a1463 2 + +\change_unchanged +d1482 2 +d1485 1 +a1485 51 +\change_deleted 0 1283307675 +There are three details which become important: +\end_layout + +\begin_layout Enumerate + +\change_deleted 0 1283307675 +On encountering a full bucket, we use the next bucket. +\end_layout + +\begin_layout Enumerate + +\change_deleted 0 1283307675 +Extra hash bits are stored with the offset, to reduce comparisons. +\end_layout + +\begin_layout Enumerate + +\change_deleted 0 1283307675 +A marker entry is used on deleting an entry. +\end_layout + +\begin_layout Standard + +\change_deleted 0 1283307675 +The doubling of the table must be done under a transaction; we will not + reduce it on deletion, so it will be an unusual case. + It will either be placed at the head (other entries will be moved out the + way so we can expand). + We could have a pointer in the header to the current hashtable location, + but that pointer would have to be read frequently to check for hashtable + moves. +\end_layout + +\begin_layout Standard + +\change_deleted 0 1283307675 +The locking for this is slightly more complex than the chained case; we + currently have one lock per bucket, and that means we would need to expand + the lock if we overflow to the next bucket. + The frequency of such collisions will effect our locking heuristics: we + can always lock more buckets than we need. +\end_layout + +\begin_layout Standard + +\change_deleted 0 1283307675 +One possible optimization is to only re-check the hash size on an insert + or a lookup miss. + +\change_inserted 0 1283307770 +a1492 2 + +\change_inserted 0 1283336187 +a1500 2 + +\change_inserted 0 1283336586 +a1510 2 +\change_unchanged + +d1636 3 +a1638 8 +Proposed Solution +\change_deleted 0 1283336858 + +\end_layout + +\begin_layout Standard +The first step is to remove all the current heuristics, as they obviously + interact, then examine them once the lock contention is addressed. +a1647 2 +\change_inserted 0 1283336910 + +a1650 2 + +\change_inserted 0 1283337052 +a1655 2 +\change_unchanged + +a1776 2 +\change_inserted 0 1283309850 + +a1779 2 + +\change_inserted 0 1283337216 +a1813 2 + +\change_inserted 0 1284424151 +a1825 2 +\change_unchanged + +a1830 2 +\change_unchanged + +a2031 2 + +\change_inserted 0 1283336739 +a2040 2 +\change_unchanged + +a2117 2 +\change_inserted 0 1283337133 + +a2120 2 + +\change_inserted 0 1283337139 +a2121 2 +\change_unchanged + +a2136 2 + +\change_inserted 0 1283337235 +a2147 2 +\change_unchanged + +d2251 1 +a2251 7 +Proposed Solution +\change_deleted 0 1284423472 + +\end_layout + +\begin_layout Standard +None. +d2261 1 +a2261 1 +\change_inserted 0 1284423891 +d2263 1 +a2263 4 +\change_deleted 0 1284423891 +. + +\change_inserted 0 1284423901 +a2271 2 +\change_unchanged + +a2293 2 +\change_inserted 0 1284423495 + +a2312 2 + +\change_inserted 0 1284424201 +d2321 1 +a2321 3 + +\change_unchanged +We could solve a small part of the problem by providing read-only transactions. +a2505 2 +\change_inserted 0 1284423555 + +a2508 2 + +\change_inserted 0 1284423617 +a2512 2 + +\change_inserted 0 1284423719 +a2519 2 + +\change_inserted 0 1284423864 +a2530 2 + +\change_inserted 0 1284423850 +a2540 2 +\change_unchanged + +@ + + +1.9 +log +@Extension mechanism. +@ +text +@d56 2 +a57 2 +\change_inserted 0 1284016854 +9-September +d479 11 +d1303 1 +a1303 1 +\change_inserted 0 1284016847 +d1310 56 +d1945 1 +a1945 1 +\change_inserted 0 1283310945 +d1956 2 +d2402 2 +d2416 4 +d2421 12 +d2455 2 +d2476 12 +d2673 47 +@ + + +1.8 +log +@Remove bogus footnote +@ +text +@d56 2 +a57 2 +\change_inserted 0 1283307544 +1-September +d838 12 +d1198 103 +@ + + +1.7 +log +@Moving hash table does not work. +@ +text +@a1436 12 +\begin_inset Foot +status collapsed + +\begin_layout Plain Layout + +\change_inserted 0 1283336450 +If we make the hash offsets zone-relative, then this only restricts the + zone size, not the overall database size. +\end_layout + +\end_inset + +@ + + +1.6 +log +@Commit changes +@ +text +@d38 1 +a38 1 +\author "" +d53 7 +a59 1 +26-July-2010 +d1333 10 +d1361 3 +a1363 1 + There are three details which become important: +d1367 2 +d1373 2 +d1379 2 +d1385 2 +d1397 2 +d1407 2 +d1411 45 +d1582 2 +d1598 14 +d1733 62 +d1996 13 +d2086 10 +d2110 15 +a2124 1 +\begin_layout LyX-Code +@ + + +1.5 +log +@Soft transaction commit +@ +text +@d38 1 +a38 1 +\author "Rusty Russell,,," +a52 4 + +\change_deleted 0 1280141199 +10-May-2010 +\change_inserted 0 1280141202 +a53 2 +\change_unchanged + +a2028 2 + +\change_inserted 0 1280140902 +a2034 2 + +\change_unchanged +a2212 2 +\change_inserted 0 1280140661 + +a2215 2 + +\change_inserted 0 1280140703 +a2219 2 + +\change_inserted 0 1280708312 +a2226 2 + +\change_inserted 0 1280708400 +a2239 2 + +\change_inserted 0 1280140836 +a2243 2 + +\change_inserted 0 1280708255 +a2247 2 + +\change_inserted 0 1280708374 +a2252 2 + +\change_inserted 0 1280141181 +a2274 2 + +\change_inserted 0 1280141345 +@ + + +1.4 +log +@Merge changes +@ +text +@d38 1 +a38 1 +\author "" +d53 2 +d56 4 +d2035 10 +d2223 84 +@ + + +1.3 +log +@Transaction and freelist rethink. +@ +text +@d38 1 +a38 1 +\author "Rusty Russell,,," +d53 1 +a53 1 +27-April-2010 +d662 1 +a662 5 + behavior of disallowing +\change_inserted 0 1272940179 +nested +\change_unchanged +transactions should become the default. +a1210 2 +\change_inserted 0 1272944650 + +a1214 2 + +\change_inserted 0 1272944763 +a1218 2 +\change_unchanged + +a1223 2 +\change_unchanged + +a1301 2 + +\change_inserted 0 1273478114 +a1310 2 +\change_unchanged + +d1515 1 +a1515 11 +The free list +\change_deleted 0 1273469807 +should +\change_inserted 0 1273469810 +must +\change_unchanged + be split +\change_deleted 0 1273469815 +into multiple lists +\change_unchanged +to reduce contention. +a1520 2 +\change_inserted 0 1273470006 + +a1523 2 + +\change_inserted 0 1273492055 +a1539 2 + +\change_inserted 0 1273483888 +a1551 2 +\change_unchanged + +a1554 8 + +\change_deleted 0 1272942055 +There are various ways to organize these lisys, but because we want to be + able to quickly identify which free list an entry is in, and reduce the + number of locks required for merging, we will use zoning (eg. + each free list covers some fixed fraction of the file). + +\change_inserted 0 1273484187 +d1556 1 +a1556 7 + +\change_deleted 0 1273484194 +The algorithm for f +\change_inserted 0 1273484194 +F +\change_unchanged +reeing is simple: +d1560 1 +a1560 7 +Identify the correct +\change_deleted 0 1273482856 +free list +\change_inserted 0 1273482857 +zone +\change_unchanged +. +d1564 1 +a1564 7 +Lock the +\change_inserted 0 1273482895 +corresponding +\change_unchanged +list +\change_inserted 0 1273482863 +. +a1567 2 + +\change_inserted 0 1273482909 +d1573 1 +a1573 13 + +\change_deleted 0 1273482885 +, and p +\change_inserted 0 1273482888 +P +\change_unchanged +lace the freed entry +\change_deleted 0 1273492415 +at the head +\change_inserted 0 1273492415 +in the list for that zone +\change_unchanged +. +d1577 2 +a1578 7 +Allocation is a little more complicated, as we +\change_deleted 0 1273483240 +merge entries as we walk the list: +\change_inserted 0 1273484250 +perform delayed coalescing at this point: +\change_unchanged + +d1582 1 +a1582 19 +Pick a +\change_deleted 0 1273482955 +free list; +\change_inserted 0 1273482957 +zone +\change_unchanged + either the +\change_deleted 0 1273482962 +list +\change_inserted 0 1273482962 +zone +\change_unchanged + we last freed +\change_deleted 0 1273482966 +o +\change_inserted 0 1273482966 +i +\change_unchanged +nto, or based on a +d1594 1 +a1594 9 +Lock th +\change_inserted 0 1273482980 +e corresponding +\change_deleted 0 1273482973 +at +\change_unchanged + list. +\change_inserted 0 1273482982 + +a1597 2 + +\change_inserted 0 1273483084 +a1598 53 +\change_unchanged + +\end_layout + +\begin_layout Enumerate +If the top entry is +\change_deleted 0 1273492155 +well-sized, +\change_inserted 0 1273492159 +-large enough, +\change_unchanged +remove it from the list and return it. +\end_layout + +\begin_layout Enumerate +Otherwise, +\change_inserted 0 1273492206 +coalesce entries in the list. +\change_deleted 0 1273492200 +examine the entry to the right of it in the file. + If it is free: +\end_layout + +\begin_deeper +\begin_layout Enumerate + +\change_deleted 0 1273492200 +If that entry is in a different list, lock that list too. +\end_layout + +\begin_layout Enumerate + +\change_deleted 0 1273492200 +If we had to place a new lock, re-check that the entry is free. +\end_layout + +\begin_layout Enumerate + +\change_deleted 0 1273492200 +Remove that entry from its free list and expand this entry to cover it. +\end_layout + +\begin_layout Enumerate + +\change_deleted 0 1273485554 +Goto step 3. +\end_layout + +\end_deeper +\begin_layout Enumerate + +\change_inserted 0 1273485311 +If there was no entry large enough, unlock the list and try the next zone. +d1602 1 +a1602 5 + +\change_deleted 0 1273483646 +Repeat step 3 with each entry in the list. +\change_unchanged + +d1606 2 +a1607 5 + +\change_deleted 0 1273483668 +Unlock the list and repeat step 2 with the next list. +\change_unchanged + +d1611 1 +a1611 7 +If no +\change_deleted 0 1273483671 +list +\change_inserted 0 1273483671 +zone +\change_unchanged + satisfies, expand the file. +d1615 2 +a1616 9 +This optimizes rapid insert/delete of free list entries +\change_inserted 0 1273485794 + by not coalescing them all the time. +\change_deleted 0 1273483685 +, and allows us to get rid of the tailer altogether +\change_unchanged +. + +\change_inserted 0 1273492299 +a1638 39 + +\change_deleted 0 1273476840 +The question of +\begin_inset Quotes eld +\end_inset + +well-sized +\begin_inset Quotes erd +\end_inset + + free entries is more difficult: the 25% overhead works in practice for + ldb because indexes tend to expand by one record at a time. + This can be resolved by having an +\begin_inset Quotes eld +\end_inset + +expanded +\begin_inset Quotes erd +\end_inset + + bit in the header to note entries that have previously expanded, and allocating + more space for them. + Whether the +\begin_inset Quotes eld +\end_inset + +increasing slack +\begin_inset Quotes erd +\end_inset + + algorithm should be implemented or first-fit used is still unknown: we + will determine this once these other ideas are implemented. +\change_inserted 0 1273483750 + +\end_layout + +\begin_layout Standard + +\change_inserted 0 1273492450 +a1644 2 + +\change_inserted 0 1273470441 +a1654 2 + +\change_inserted 0 1273476556 +a1659 2 + +\change_inserted 0 1273470423 +a1661 2 +\change_unchanged + +a1672 2 + +\change_inserted 0 1273476847 +a1676 2 + +\change_inserted 0 1273476886 +a1691 2 + +\change_inserted 0 1273477233 +a1699 2 + +\change_inserted 0 1273477534 +a1706 2 + +\change_inserted 0 1273482700 +a1712 2 + +\change_inserted 0 1273478079 +a1722 2 + +\change_inserted 0 1273477839 +a1726 2 + +\change_inserted 0 1273477925 +a1730 2 + +\change_inserted 0 1273477925 +a1734 2 + +\change_inserted 0 1273477925 +a1738 2 + +\change_inserted 0 1273477925 +a1742 2 + +\change_inserted 0 1273477925 +a1746 2 + +\change_inserted 0 1273477925 +a1750 2 + +\change_inserted 0 1273477925 +a1754 2 + +\change_inserted 0 1273477925 +a1758 2 + +\change_inserted 0 1273477925 +a1762 2 + +\change_inserted 0 1273477925 +a1766 2 + +\change_inserted 0 1273477925 +a1770 2 + +\change_inserted 0 1273477925 +a1774 2 + +\change_inserted 0 1273477925 +a1778 2 + +\change_inserted 0 1273477925 +a1782 2 + +\change_inserted 0 1273477925 +a1786 2 + +\change_inserted 0 1273477925 +a1790 2 + +\change_inserted 0 1273477925 +a1794 2 + +\change_inserted 0 1273477925 +a1798 2 + +\change_inserted 0 1273492522 +a1802 2 + +\change_inserted 0 1273492530 +a1806 2 + +\change_inserted 0 1273492546 +a1810 2 + +\change_inserted 0 1273478239 +a1814 2 + +\change_inserted 0 1273479960 +a1821 2 + +\change_inserted 0 1273480265 +a1830 2 + +\change_inserted 0 1273480354 +a1845 2 + +\change_inserted 0 1273478968 +a1851 2 + +\change_inserted 0 1273492604 +a1859 2 + +\change_inserted 0 1273479572 +a1862 2 +\change_unchanged + +a1870 2 + +\change_inserted 0 1273480282 +a1874 2 + +\change_inserted 0 1273478931 +a1878 2 + +\change_inserted 0 1273481549 +a1882 2 + +\change_inserted 0 1273481557 +a1886 2 + +\change_inserted 0 1273480307 +a1890 2 + +\change_inserted 0 1273480335 +a1894 2 + +\change_inserted 0 1273479897 +a1898 2 + +\change_inserted 0 1273479653 +a1902 2 + +\change_inserted 0 1273480371 +a1906 2 + +\change_inserted 0 1273480464 +a1910 2 + +\change_inserted 0 1273480399 +a1914 2 + +\change_inserted 0 1273480425 +a1918 2 + +\change_inserted 0 1273480453 +a1922 2 + +\change_inserted 0 1273480455 +a1926 2 + +\change_inserted 0 1273480450 +a1930 2 + +\change_inserted 0 1273480452 +a1935 2 +\change_inserted 0 1273478830 + +a1942 5 + +\change_deleted 0 1273481604 +In theory, we could get away with 2: one after we write the new data, and + one to somehow atomically change over to it. +\change_inserted 0 1273481632 +a1946 2 + +\change_inserted 0 1273481724 +a1950 2 + +\change_inserted 0 1273481713 +a1954 2 + +\change_inserted 0 1273481717 +a1958 2 + +\change_inserted 0 1273481730 +a1962 2 + +\change_inserted 0 1273481736 +a1966 2 + +\change_inserted 0 1273481744 +a1970 2 + +\change_inserted 0 1273481748 +a1974 2 + +\change_inserted 0 1273482185 +a1978 2 + +\change_inserted 0 1273482259 +a1989 50 + +\change_deleted 0 1273481848 +None. + Trying to rewrite the transaction code is a separate experiment, which + I encourage someone else to do. + At some point you say +\begin_inset Quotes eld +\end_inset + +use a real database +\begin_inset Quotes erd +\end_inset + +. +\end_layout + +\begin_layout Standard + +\change_deleted 0 1273481848 +But as a thought experiment: +\change_unchanged + +\end_layout + +\begin_layout Standard + +\change_deleted 0 1273481788 +Say there was a pointer in the header which said where the hash table and + free list tables were, and that no blocks were labeled with whether they + were free or not (it had to be derived from what list they were in). + We could create new hash table and free list in some free space, and populate + it as we want the post-committed state to look. + Then we sync, then we switch the offset in the header, then we sync again. +\end_layout + +\begin_layout Standard + +\change_deleted 0 1273481788 +This would not allow arbitrary changes to the database, such as tdb_repack + does, and would require more space (since we have to preserve the current + and future entries at once). + If we used hash trees rather than one big hash table, we might only have + to rewrite some sections of the hash, too. +\change_inserted 0 1273481854 + +\end_layout + +\begin_layout Standard + +\change_inserted 0 1273482102 +a1993 2 + +\change_inserted 0 1273482061 +a1998 2 + +\change_inserted 0 1273482063 +a2002 2 + +\change_inserted 0 1273482072 +a2006 2 + +\change_inserted 0 1273482139 +a2011 2 + +\change_inserted 0 1273482364 +a2015 2 + +\change_inserted 0 1273482163 +a2019 2 + +\change_inserted 0 1273482493 +a2037 2 + +\change_inserted 0 1273482536 +a2046 2 +\change_unchanged + +a2049 2 + +\change_inserted 0 1273482641 +a2058 2 + +\change_inserted 0 1273481827 +d2067 2 +a2068 11 +We could +\change_inserted 0 1273481829 +then +\change_unchanged +implement snapshots using a similar method +\change_deleted 0 1273481838 + to the above, only +\change_inserted 0 1273481840 +, +\change_unchanged + using multiple different hash tables/free tables. +@ + + +1.2 +log +@After first feedback (Ronnie & Volker) +@ +text +@d1314 13 +d1531 11 +a1541 1 +The free list should be split into multiple lists to reduce contention. +d1547 39 +d1596 7 +d1604 1 +a1604 1 +The algorithm for freeing is simple: +d1608 7 +a1614 1 +Identify the correct free list. +d1618 30 +a1647 1 +Lock the list, and place the freed entry at the head. +d1651 7 +a1657 2 +Allocation is a little more complicated, as we merge entries as we walk + the list: +d1661 19 +a1679 1 +Pick a free list; either the list we last freed onto, or based on a +d1691 17 +a1707 1 +Lock that list. +d1711 7 +a1717 1 +If the top entry is well-sized, remove it from the list and return it. +d1721 5 +a1725 1 +Otherwise, examine the entry to the right of it in the file. +d1731 2 +d1737 2 +d1743 2 +d1749 2 +d1756 8 +d1765 2 +d1770 2 +d1773 2 +d1778 7 +a1784 1 +If no list satisfies, expand the file. +d1788 28 +a1815 2 +This optimizes rapid insert/delete of free list entries, and allows us to + get rid of the tailer altogether. +d1819 2 +d1851 1 +a1851 1 +\change_inserted 0 1272941474 +d1857 303 +a2159 18 +\change_inserted 0 1272942759 +There are various ways to organize these lists, but because we want to be + able to quickly identify which free list an entry is in, and reduce the + number of locks required for merging, we will use zoning (eg. + each of the N free lists in a tdb file of size M covers a fixed fraction + M/N). + Note that this means we need to reshuffle the free lists when we expand + the file; this is probably acceptable when we double the hash table size, + since that is such an expensive operation already. + In the case of increasing the file size, there is an optimization we can + use: if we use M in the formula above as the file size rounded up to the + next power of 2, we only need reshuffle free lists when the file size crosses + a power of 2 boundary, +\emph on +and +\emph default +reshuffling the free lists is trivial: we simply merge every consecutive + pair of free lists. +d2164 107 +d2276 2 +d2280 59 +d2346 2 +d2363 2 +d2366 2 +d2371 2 +d2382 2 +d2389 57 +d2458 13 +d2474 32 +a2505 2 +We could implement snapshots using a similar method to the above, only using + multiple different hash tables/free tables. +@ + + +1.1 +log +@Initial revision +@ +text +@d1 1 +a1 1 +#LyX 1.6.4 created this file. For more info see http://www.lyx.org/ +d36 3 +a38 3 +\tracking_changes false +\output_changes false +\author "" +d662 5 +a666 1 + behavior of disallowing transactions should become the default. +d1215 21 +d1527 2 +d1533 3 +a1535 1 + The algorithm for freeing is simple: +d1642 26 +@ diff --git a/lib/tdb2/doc/design.pdf b/lib/tdb2/doc/design.pdf Binary files differnew file mode 100644 index 0000000000..558dc1f8c2 --- /dev/null +++ b/lib/tdb2/doc/design.pdf diff --git a/lib/tdb2/doc/design.txt b/lib/tdb2/doc/design.txt new file mode 100644 index 0000000000..bd2ffde4db --- /dev/null +++ b/lib/tdb2/doc/design.txt @@ -0,0 +1,1258 @@ +TDB2: A Redesigning The Trivial DataBase + +Rusty Russell, IBM Corporation + +1-December-2010 + +Abstract + +The Trivial DataBase on-disk format is 32 bits; with usage cases +heading towards the 4G limit, that must change. This required +breakage provides an opportunity to revisit TDB's other design +decisions and reassess them. + +1 Introduction + +The Trivial DataBase was originally written by Andrew Tridgell as +a simple key/data pair storage system with the same API as dbm, +but allowing multiple readers and writers while being small +enough (< 1000 lines of C) to include in SAMBA. The simple design +created in 1999 has proven surprisingly robust and performant, +used in Samba versions 3 and 4 as well as numerous other +projects. Its useful life was greatly increased by the +(backwards-compatible!) addition of transaction support in 2005. + +The wider variety and greater demands of TDB-using code has lead +to some organic growth of the API, as well as some compromises on +the implementation. None of these, by themselves, are seen as +show-stoppers, but the cumulative effect is to a loss of elegance +over the initial, simple TDB implementation. Here is a table of +the approximate number of lines of implementation code and number +of API functions at the end of each year: + + ++-----------+----------------+--------------------------------+ +| Year End | API Functions | Lines of C Code Implementation | ++-----------+----------------+--------------------------------+ ++-----------+----------------+--------------------------------+ +| 1999 | 13 | 1195 | ++-----------+----------------+--------------------------------+ +| 2000 | 24 | 1725 | ++-----------+----------------+--------------------------------+ +| 2001 | 32 | 2228 | ++-----------+----------------+--------------------------------+ +| 2002 | 35 | 2481 | ++-----------+----------------+--------------------------------+ +| 2003 | 35 | 2552 | ++-----------+----------------+--------------------------------+ +| 2004 | 40 | 2584 | ++-----------+----------------+--------------------------------+ +| 2005 | 38 | 2647 | ++-----------+----------------+--------------------------------+ +| 2006 | 52 | 3754 | ++-----------+----------------+--------------------------------+ +| 2007 | 66 | 4398 | ++-----------+----------------+--------------------------------+ +| 2008 | 71 | 4768 | ++-----------+----------------+--------------------------------+ +| 2009 | 73 | 5715 | ++-----------+----------------+--------------------------------+ + + +This review is an attempt to catalog and address all the known +issues with TDB and create solutions which address the problems +without significantly increasing complexity; all involved are far +too aware of the dangers of second system syndrome in rewriting a +successful project like this. + +2 API Issues + +2.1 tdb_open_ex Is Not Expandable + +The tdb_open() call was expanded to tdb_open_ex(), which added an +optional hashing function and an optional logging function +argument. Additional arguments to open would require the +introduction of a tdb_open_ex2 call etc. + +2.1.1 Proposed Solution<attributes> + +tdb_open() will take a linked-list of attributes: + +enum tdb_attribute { + + TDB_ATTRIBUTE_LOG = 0, + + TDB_ATTRIBUTE_HASH = 1 + +}; + +struct tdb_attribute_base { + + enum tdb_attribute attr; + + union tdb_attribute *next; + +}; + +struct tdb_attribute_log { + + struct tdb_attribute_base base; /* .attr = TDB_ATTRIBUTE_LOG +*/ + + tdb_log_func log_fn; + + void *log_private; + +}; + +struct tdb_attribute_hash { + + struct tdb_attribute_base base; /* .attr = TDB_ATTRIBUTE_HASH +*/ + + tdb_hash_func hash_fn; + + void *hash_private; + +}; + +union tdb_attribute { + + struct tdb_attribute_base base; + + struct tdb_attribute_log log; + + struct tdb_attribute_hash hash; + +}; + +This allows future attributes to be added, even if this expands +the size of the union. + +2.1.2 Status + +Complete. + +2.2 tdb_traverse Makes Impossible Guarantees + +tdb_traverse (and tdb_firstkey/tdb_nextkey) predate transactions, +and it was thought that it was important to guarantee that all +records which exist at the start and end of the traversal would +be included, and no record would be included twice. + +This adds complexity (see[Reliable-Traversal-Adds]) and does not +work anyway for records which are altered (in particular, those +which are expanded may be effectively deleted and re-added behind +the traversal). + +2.2.1 <traverse-Proposed-Solution>Proposed Solution + +Abandon the guarantee. You will see every record if no changes +occur during your traversal, otherwise you will see some subset. +You can prevent changes by using a transaction or the locking +API. + +2.2.2 Status + +Complete. Delete-during-traverse will still delete every record, +too (assuming no other changes). + +2.3 Nesting of Transactions Is Fraught + +TDB has alternated between allowing nested transactions and not +allowing them. Various paths in the Samba codebase assume that +transactions will nest, and in a sense they can: the operation is +only committed to disk when the outer transaction is committed. +There are two problems, however: + +1. Canceling the inner transaction will cause the outer + transaction commit to fail, and will not undo any operations + since the inner transaction began. This problem is soluble with + some additional internal code. + +2. An inner transaction commit can be cancelled by the outer + transaction. This is desirable in the way which Samba's + database initialization code uses transactions, but could be a + surprise to any users expecting a successful transaction commit + to expose changes to others. + +The current solution is to specify the behavior at tdb_open(), +with the default currently that nested transactions are allowed. +This flag can also be changed at runtime. + +2.3.1 Proposed Solution + +Given the usage patterns, it seems that the “least-surprise” +behavior of disallowing nested transactions should become the +default. Additionally, it seems the outer transaction is the only +code which knows whether inner transactions should be allowed, so +a flag to indicate this could be added to tdb_transaction_start. +However, this behavior can be simulated with a wrapper which uses +tdb_add_flags() and tdb_remove_flags(), so the API should not be +expanded for this relatively-obscure case. + +2.3.2 Status + +Incomplete; nesting flag is still defined as per tdb1. + +2.4 Incorrect Hash Function is Not Detected + +tdb_open_ex() allows the calling code to specify a different hash +function to use, but does not check that all other processes +accessing this tdb are using the same hash function. The result +is that records are missing from tdb_fetch(). + +2.4.1 Proposed Solution + +The header should contain an example hash result (eg. the hash of +0xdeadbeef), and tdb_open_ex() should check that the given hash +function produces the same answer, or fail the tdb_open call. + +2.4.2 Status + +Complete. + +2.5 tdb_set_max_dead/TDB_VOLATILE Expose Implementation + +In response to scalability issues with the free list ([TDB-Freelist-Is] +) two API workarounds have been incorporated in TDB: +tdb_set_max_dead() and the TDB_VOLATILE flag to tdb_open. The +latter actually calls the former with an argument of “5”. + +This code allows deleted records to accumulate without putting +them in the free list. On delete we iterate through each chain +and free them in a batch if there are more than max_dead entries. +These are never otherwise recycled except as a side-effect of a +tdb_repack. + +2.5.1 Proposed Solution + +With the scalability problems of the freelist solved, this API +can be removed. The TDB_VOLATILE flag may still be useful as a +hint that store and delete of records will be at least as common +as fetch in order to allow some internal tuning, but initially +will become a no-op. + +2.5.2 Status + +Incomplete. TDB_VOLATILE still defined, but implementation should +fail on unknown flags to be future-proof. + +2.6 <TDB-Files-Cannot>TDB Files Cannot Be Opened Multiple Times + In The Same Process + +No process can open the same TDB twice; we check and disallow it. +This is an unfortunate side-effect of fcntl locks, which operate +on a per-file rather than per-file-descriptor basis, and do not +nest. Thus, closing any file descriptor on a file clears all the +locks obtained by this process, even if they were placed using a +different file descriptor! + +Note that even if this were solved, deadlock could occur if +operations were nested: this is a more manageable programming +error in most cases. + +2.6.1 Proposed Solution + +We could lobby POSIX to fix the perverse rules, or at least lobby +Linux to violate them so that the most common implementation does +not have this restriction. This would be a generally good idea +for other fcntl lock users. + +Samba uses a wrapper which hands out the same tdb_context to +multiple callers if this happens, and does simple reference +counting. We should do this inside the tdb library, which already +emulates lock nesting internally; it would need to recognize when +deadlock occurs within a single process. This would create a new +failure mode for tdb operations (while we currently handle +locking failures, they are impossible in normal use and a process +encountering them can do little but give up). + +I do not see benefit in an additional tdb_open flag to indicate +whether re-opening is allowed, as though there may be some +benefit to adding a call to detect when a tdb_context is shared, +to allow other to create such an API. + +2.6.2 Status + +Incomplete. + +2.7 TDB API Is Not POSIX Thread-safe + +The TDB API uses an error code which can be queried after an +operation to determine what went wrong. This programming model +does not work with threads, unless specific additional guarantees +are given by the implementation. In addition, even +otherwise-independent threads cannot open the same TDB (as in [TDB-Files-Cannot] +). + +2.7.1 Proposed Solution + +Reachitecting the API to include a tdb_errcode pointer would be a +great deal of churn; we are better to guarantee that the +tdb_errcode is per-thread so the current programming model can be +maintained. + +This requires dynamic per-thread allocations, which is awkward +with POSIX threads (pthread_key_create space is limited and we +cannot simply allocate a key for every TDB). + +Internal locking is required to make sure that fcntl locks do not +overlap between threads, and also that the global list of tdbs is +maintained. + +The aim is that building tdb with -DTDB_PTHREAD will result in a +pthread-safe version of the library, and otherwise no overhead +will exist. Alternatively, a hooking mechanism similar to that +proposed for [Proposed-Solution-locking-hook] could be used to +enable pthread locking at runtime. + +2.7.2 Status + +Incomplete. + +2.8 *_nonblock Functions And *_mark Functions Expose + Implementation + +CTDB[footnote: +Clustered TDB, see http://ctdb.samba.org +] wishes to operate on TDB in a non-blocking manner. This is +currently done as follows: + +1. Call the _nonblock variant of an API function (eg. + tdb_lockall_nonblock). If this fails: + +2. Fork a child process, and wait for it to call the normal + variant (eg. tdb_lockall). + +3. If the child succeeds, call the _mark variant to indicate we + already have the locks (eg. tdb_lockall_mark). + +4. Upon completion, tell the child to release the locks (eg. + tdb_unlockall). + +5. Indicate to tdb that it should consider the locks removed (eg. + tdb_unlockall_mark). + +There are several issues with this approach. Firstly, adding two +new variants of each function clutters the API for an obscure +use, and so not all functions have three variants. Secondly, it +assumes that all paths of the functions ask for the same locks, +otherwise the parent process will have to get a lock which the +child doesn't have under some circumstances. I don't believe this +is currently the case, but it constrains the implementation. + +2.8.1 <Proposed-Solution-locking-hook>Proposed Solution + +Implement a hook for locking methods, so that the caller can +control the calls to create and remove fcntl locks. In this +scenario, ctdbd would operate as follows: + +1. Call the normal API function, eg tdb_lockall(). + +2. When the lock callback comes in, check if the child has the + lock. Initially, this is always false. If so, return 0. + Otherwise, try to obtain it in non-blocking mode. If that + fails, return EWOULDBLOCK. + +3. Release locks in the unlock callback as normal. + +4. If tdb_lockall() fails, see if we recorded a lock failure; if + so, call the child to repeat the operation. + +5. The child records what locks it obtains, and returns that + information to the parent. + +6. When the child has succeeded, goto 1. + +This is flexible enough to handle any potential locking scenario, +even when lock requirements change. It can be optimized so that +the parent does not release locks, just tells the child which +locks it doesn't need to obtain. + +It also keeps the complexity out of the API, and in ctdbd where +it is needed. + +2.8.2 Status + +Incomplete. + +2.9 tdb_chainlock Functions Expose Implementation + +tdb_chainlock locks some number of records, including the record +indicated by the given key. This gave atomicity guarantees; +no-one can start a transaction, alter, read or delete that key +while the lock is held. + +It also makes the same guarantee for any other key in the chain, +which is an internal implementation detail and potentially a +cause for deadlock. + +2.9.1 Proposed Solution + +None. It would be nice to have an explicit single entry lock +which effected no other keys. Unfortunately, this won't work for +an entry which doesn't exist. Thus while chainlock may be +implemented more efficiently for the existing case, it will still +have overlap issues with the non-existing case. So it is best to +keep the current (lack of) guarantee about which records will be +effected to avoid constraining our implementation. + +2.10 Signal Handling is Not Race-Free + +The tdb_setalarm_sigptr() call allows the caller's signal handler +to indicate that the tdb locking code should return with a +failure, rather than trying again when a signal is received (and +errno == EAGAIN). This is usually used to implement timeouts. + +Unfortunately, this does not work in the case where the signal is +received before the tdb code enters the fcntl() call to place the +lock: the code will sleep within the fcntl() code, unaware that +the signal wants it to exit. In the case of long timeouts, this +does not happen in practice. + +2.10.1 Proposed Solution + +The locking hooks proposed in[Proposed-Solution-locking-hook] +would allow the user to decide on whether to fail the lock +acquisition on a signal. This allows the caller to choose their +own compromise: they could narrow the race by checking +immediately before the fcntl call.[footnote: +It may be possible to make this race-free in some implementations +by having the signal handler alter the struct flock to make it +invalid. This will cause the fcntl() lock call to fail with +EINVAL if the signal occurs before the kernel is entered, +otherwise EAGAIN. +] + +2.10.2 Status + +Incomplete. + +2.11 The API Uses Gratuitous Typedefs, Capitals + +typedefs are useful for providing source compatibility when types +can differ across implementations, or arguably in the case of +function pointer definitions which are hard for humans to parse. +Otherwise it is simply obfuscation and pollutes the namespace. + +Capitalization is usually reserved for compile-time constants and +macros. + + TDB_CONTEXT There is no reason to use this over 'struct + tdb_context'; the definition isn't visible to the API user + anyway. + + TDB_DATA There is no reason to use this over struct TDB_DATA; + the struct needs to be understood by the API user. + + struct TDB_DATA This would normally be called 'struct + tdb_data'. + + enum TDB_ERROR Similarly, this would normally be enum + tdb_error. + +2.11.1 Proposed Solution + +None. Introducing lower case variants would please pedants like +myself, but if it were done the existing ones should be kept. +There is little point forcing a purely cosmetic change upon tdb +users. + +2.12 <tdb_log_func-Doesnt-Take>tdb_log_func Doesn't Take The + Private Pointer + +For API compatibility reasons, the logging function needs to call +tdb_get_logging_private() to retrieve the pointer registered by +the tdb_open_ex for logging. + +2.12.1 Proposed Solution + +It should simply take an extra argument, since we are prepared to +break the API/ABI. + +2.12.2 Status + +Complete. + +2.13 Various Callback Functions Are Not Typesafe + +The callback functions in tdb_set_logging_function (after [tdb_log_func-Doesnt-Take] + is resolved), tdb_parse_record, tdb_traverse, tdb_traverse_read +and tdb_check all take void * and must internally convert it to +the argument type they were expecting. + +If this type changes, the compiler will not produce warnings on +the callers, since it only sees void *. + +2.13.1 Proposed Solution + +With careful use of macros, we can create callback functions +which give a warning when used on gcc and the types of the +callback and its private argument differ. Unsupported compilers +will not give a warning, which is no worse than now. In addition, +the callbacks become clearer, as they need not use void * for +their parameter. + +See CCAN's typesafe_cb module at +http://ccan.ozlabs.org/info/typesafe_cb.html + +2.13.2 Status + +Incomplete. + +2.14 TDB_CLEAR_IF_FIRST Must Be Specified On All Opens, + tdb_reopen_all Problematic + +The TDB_CLEAR_IF_FIRST flag to tdb_open indicates that the TDB +file should be cleared if the caller discovers it is the only +process with the TDB open. However, if any caller does not +specify TDB_CLEAR_IF_FIRST it will not be detected, so will have +the TDB erased underneath them (usually resulting in a crash). + +There is a similar issue on fork(); if the parent exits (or +otherwise closes the tdb) before the child calls tdb_reopen_all() +to establish the lock used to indicate the TDB is opened by +someone, a TDB_CLEAR_IF_FIRST opener at that moment will believe +it alone has opened the TDB and will erase it. + +2.14.1 Proposed Solution + +Remove TDB_CLEAR_IF_FIRST. Other workarounds are possible, but +see [TDB_CLEAR_IF_FIRST-Imposes-Performance]. + +2.14.2 Status + +Incomplete, TDB_CLEAR_IF_FIRST still defined, but not +implemented. + +2.15 Extending The Header Is Difficult + +We have reserved (zeroed) words in the TDB header, which can be +used for future features. If the future features are compulsory, +the version number must be updated to prevent old code from +accessing the database. But if the future feature is optional, we +have no way of telling if older code is accessing the database or +not. + +2.15.1 Proposed Solution + +The header should contain a “format variant” value (64-bit). This +is divided into two 32-bit parts: + +1. The lower part reflects the format variant understood by code + accessing the database. + +2. The upper part reflects the format variant you must understand + to write to the database (otherwise you can only open for + reading). + +The latter field can only be written at creation time, the former +should be written under the OPEN_LOCK when opening the database +for writing, if the variant of the code is lower than the current +lowest variant. + +This should allow backwards-compatible features to be added, and +detection if older code (which doesn't understand the feature) +writes to the database. + +2.15.2 Status + +Incomplete. + +2.16 Record Headers Are Not Expandible + +If we later want to add (say) checksums on keys and data, it +would require another format change, which we'd like to avoid. + +2.16.1 Proposed Solution + +We often have extra padding at the tail of a record. If we ensure +that the first byte (if any) of this padding is zero, we will +have a way for future changes to detect code which doesn't +understand a new format: the new code would write (say) a 1 at +the tail, and thus if there is no tail or the first byte is 0, we +would know the extension is not present on that record. + +2.16.2 Status + +Incomplete. + +2.17 TDB Does Not Use Talloc + +Many users of TDB (particularly Samba) use the talloc allocator, +and thus have to wrap TDB in a talloc context to use it +conveniently. + +2.17.1 Proposed Solution + +The allocation within TDB is not complicated enough to justify +the use of talloc, and I am reluctant to force another +(excellent) library on TDB users. Nonetheless a compromise is +possible. An attribute (see [attributes]) can be added later to +tdb_open() to provide an alternate allocation mechanism, +specifically for talloc but usable by any other allocator (which +would ignore the “context” argument). + +This would form a talloc heirarchy as expected, but the caller +would still have to attach a destructor to the tdb context +returned from tdb_open to close it. All TDB_DATA fields would be +children of the tdb_context, and the caller would still have to +manage them (using talloc_free() or talloc_steal()). + +2.17.2 Status + +Deferred. + +3 Performance And Scalability Issues + +3.1 <TDB_CLEAR_IF_FIRST-Imposes-Performance>TDB_CLEAR_IF_FIRST + Imposes Performance Penalty + +When TDB_CLEAR_IF_FIRST is specified, a 1-byte read lock is +placed at offset 4 (aka. the ACTIVE_LOCK). While these locks +never conflict in normal tdb usage, they do add substantial +overhead for most fcntl lock implementations when the kernel +scans to detect if a lock conflict exists. This is often a single +linked list, making the time to acquire and release a fcntl lock +O(N) where N is the number of processes with the TDB open, not +the number actually doing work. + +In a Samba server it is common to have huge numbers of clients +sitting idle, and thus they have weaned themselves off the +TDB_CLEAR_IF_FIRST flag.[footnote: +There is a flag to tdb_reopen_all() which is used for this +optimization: if the parent process will outlive the child, the +child does not need the ACTIVE_LOCK. This is a workaround for +this very performance issue. +] + +3.1.1 Proposed Solution + +Remove the flag. It was a neat idea, but even trivial servers +tend to know when they are initializing for the first time and +can simply unlink the old tdb at that point. + +3.1.2 Status + +Incomplete; TDB_CLEAR_IF_FIRST still defined, but does nothing. + +3.2 TDB Files Have a 4G Limit + +This seems to be becoming an issue (so much for “trivial”!), +particularly for ldb. + +3.2.1 Proposed Solution + +A new, incompatible TDB format which uses 64 bit offsets +internally rather than 32 bit as now. For simplicity of endian +conversion (which TDB does on the fly if required), all values +will be 64 bit on disk. In practice, some upper bits may be used +for other purposes, but at least 56 bits will be available for +file offsets. + +tdb_open() will automatically detect the old version, and even +create them if TDB_VERSION6 is specified to tdb_open. + +32 bit processes will still be able to access TDBs larger than 4G +(assuming that their off_t allows them to seek to 64 bits), they +will gracefully fall back as they fail to mmap. This can happen +already with large TDBs. + +Old versions of tdb will fail to open the new TDB files (since 28 +August 2009, commit 398d0c29290: prior to that any unrecognized +file format would be erased and initialized as a fresh tdb!) + +3.2.2 Status + +Complete. + +3.3 TDB Records Have a 4G Limit + +This has not been a reported problem, and the API uses size_t +which can be 64 bit on 64 bit platforms. However, other limits +may have made such an issue moot. + +3.3.1 Proposed Solution + +Record sizes will be 64 bit, with an error returned on 32 bit +platforms which try to access such records (the current +implementation would return TDB_ERR_OOM in a similar case). It +seems unlikely that 32 bit keys will be a limitation, so the +implementation may not support this (see [sub:Records-Incur-A]). + +3.3.2 Status + +Complete. + +3.4 Hash Size Is Determined At TDB Creation Time + +TDB contains a number of hash chains in the header; the number is +specified at creation time, and defaults to 131. This is such a +bottleneck on large databases (as each hash chain gets quite +long), that LDB uses 10,000 for this hash. In general it is +impossible to know what the 'right' answer is at database +creation time. + +3.4.1 <sub:Hash-Size-Solution>Proposed Solution + +After comprehensive performance testing on various scalable hash +variants[footnote: +http://rusty.ozlabs.org/?p=89 and http://rusty.ozlabs.org/?p=94 +This was annoying because I was previously convinced that an +expanding tree of hashes would be very close to optimal. +], it became clear that it is hard to beat a straight linear hash +table which doubles in size when it reaches saturation. +Unfortunately, altering the hash table introduces serious locking +complications: the entire hash table needs to be locked to +enlarge the hash table, and others might be holding locks. +Particularly insidious are insertions done under tdb_chainlock. + +Thus an expanding layered hash will be used: an array of hash +groups, with each hash group exploding into pointers to lower +hash groups once it fills, turning into a hash tree. This has +implications for locking: we must lock the entire group in case +we need to expand it, yet we don't know how deep the tree is at +that point. + +Note that bits from the hash table entries should be stolen to +hold more hash bits to reduce the penalty of collisions. We can +use the otherwise-unused lower 3 bits. If we limit the size of +the database to 64 exabytes, we can use the top 8 bits of the +hash entry as well. These 11 bits would reduce false positives +down to 1 in 2000 which is more than we need: we can use one of +the bits to indicate that the extra hash bits are valid. This +means we can choose not to re-hash all entries when we expand a +hash group; simply use the next bits we need and mark them +invalid. + +3.4.2 Status + +Complete. + +3.5 <TDB-Freelist-Is>TDB Freelist Is Highly Contended + +TDB uses a single linked list for the free list. Allocation +occurs as follows, using heuristics which have evolved over time: + +1. Get the free list lock for this whole operation. + +2. Multiply length by 1.25, so we always over-allocate by 25%. + +3. Set the slack multiplier to 1. + +4. Examine the current freelist entry: if it is > length but < + the current best case, remember it as the best case. + +5. Multiply the slack multiplier by 1.05. + +6. If our best fit so far is less than length * slack multiplier, + return it. The slack will be turned into a new free record if + it's large enough. + +7. Otherwise, go onto the next freelist entry. + +Deleting a record occurs as follows: + +1. Lock the hash chain for this whole operation. + +2. Walk the chain to find the record, keeping the prev pointer + offset. + +3. If max_dead is non-zero: + + (a) Walk the hash chain again and count the dead records. + + (b) If it's more than max_dead, bulk free all the dead ones + (similar to steps 4 and below, but the lock is only obtained + once). + + (c) Simply mark this record as dead and return. + +4. Get the free list lock for the remainder of this operation. + +5. <right-merging>Examine the following block to see if it is + free; if so, enlarge the current block and remove that block + from the free list. This was disabled, as removal from the free + list was O(entries-in-free-list). + +6. Examine the preceeding block to see if it is free: for this + reason, each block has a 32-bit tailer which indicates its + length. If it is free, expand it to cover our new block and + return. + +7. Otherwise, prepend ourselves to the free list. + +Disabling right-merging (step [right-merging]) causes +fragmentation; the other heuristics proved insufficient to +address this, so the final answer to this was that when we expand +the TDB file inside a transaction commit, we repack the entire +tdb. + +The single list lock limits our allocation rate; due to the other +issues this is not currently seen as a bottleneck. + +3.5.1 Proposed Solution + +The first step is to remove all the current heuristics, as they +obviously interact, then examine them once the lock contention is +addressed. + +The free list must be split to reduce contention. Assuming +perfect free merging, we can at most have 1 free list entry for +each entry. This implies that the number of free lists is related +to the size of the hash table, but as it is rare to walk a large +number of free list entries we can use far fewer, say 1/32 of the +number of hash buckets. + +It seems tempting to try to reuse the hash implementation which +we use for records here, but we have two ways of searching for +free entries: for allocation we search by size (and possibly +zone) which produces too many clashes for our hash table to +handle well, and for coalescing we search by address. Thus an +array of doubly-linked free lists seems preferable. + +There are various benefits in using per-size free lists (see [sub:TDB-Becomes-Fragmented] +) but it's not clear this would reduce contention in the common +case where all processes are allocating/freeing the same size. +Thus we almost certainly need to divide in other ways: the most +obvious is to divide the file into zones, and using a free list +(or table of free lists) for each. This approximates address +ordering. + +Unfortunately it is difficult to know what heuristics should be +used to determine zone sizes, and our transaction code relies on +being able to create a “recovery area” by simply appending to the +file (difficult if it would need to create a new zone header). +Thus we use a linked-list of free tables; currently we only ever +create one, but if there is more than one we choose one at random +to use. In future we may use heuristics to add new free tables on +contention. We only expand the file when all free tables are +exhausted. + +The basic algorithm is as follows. Freeing is simple: + +1. Identify the correct free list. + +2. Lock the corresponding list. + +3. Re-check the list (we didn't have a lock, sizes could have + changed): relock if necessary. + +4. Place the freed entry in the list. + +Allocation is a little more complicated, as we perform delayed +coalescing at this point: + +1. Pick a free table; usually the previous one. + +2. Lock the corresponding list. + +3. If the top entry is -large enough, remove it from the list and + return it. + +4. Otherwise, coalesce entries in the list.If there was no entry + large enough, unlock the list and try the next largest list + +5. If no list has an entry which meets our needs, try the next + free table. + +6. If no zone satisfies, expand the file. + +This optimizes rapid insert/delete of free list entries by not +coalescing them all the time.. First-fit address ordering +ordering seems to be fairly good for keeping fragmentation low +(see [sub:TDB-Becomes-Fragmented]). Note that address ordering +does not need a tailer to coalesce, though if we needed one we +could have one cheaply: see [sub:Records-Incur-A]. + +Each free entry has the free table number in the header: less +than 255. It also contains a doubly-linked list for easy +deletion. + +3.6 <sub:TDB-Becomes-Fragmented>TDB Becomes Fragmented + +Much of this is a result of allocation strategy[footnote: +The Memory Fragmentation Problem: Solved? Johnstone & Wilson 1995 +ftp://ftp.cs.utexas.edu/pub/garbage/malloc/ismm98.ps +] and deliberate hobbling of coalescing; internal fragmentation +(aka overallocation) is deliberately set at 25%, and external +fragmentation is only cured by the decision to repack the entire +db when a transaction commit needs to enlarge the file. + +3.6.1 Proposed Solution + +The 25% overhead on allocation works in practice for ldb because +indexes tend to expand by one record at a time. This internal +fragmentation can be resolved by having an “expanded” bit in the +header to note entries that have previously expanded, and +allocating more space for them. + +There are is a spectrum of possible solutions for external +fragmentation: one is to use a fragmentation-avoiding allocation +strategy such as best-fit address-order allocator. The other end +of the spectrum would be to use a bump allocator (very fast and +simple) and simply repack the file when we reach the end. + +There are three problems with efficient fragmentation-avoiding +allocators: they are non-trivial, they tend to use a single free +list for each size, and there's no evidence that tdb allocation +patterns will match those recorded for general allocators (though +it seems likely). + +Thus we don't spend too much effort on external fragmentation; we +will be no worse than the current code if we need to repack on +occasion. More effort is spent on reducing freelist contention, +and reducing overhead. + +3.7 <sub:Records-Incur-A>Records Incur A 28-Byte Overhead + +Each TDB record has a header as follows: + +struct tdb_record { + + tdb_off_t next; /* offset of the next record in the list +*/ + + tdb_len_t rec_len; /* total byte length of record */ + + tdb_len_t key_len; /* byte length of key */ + + tdb_len_t data_len; /* byte length of data */ + + uint32_t full_hash; /* the full 32 bit hash of the key */ + + uint32_t magic; /* try to catch errors */ + + /* the following union is implied: + + union { + + char record[rec_len]; + + struct { + + char key[key_len]; + + char data[data_len]; + + } + + uint32_t totalsize; (tailer) + + } + + */ + +}; + +Naively, this would double to a 56-byte overhead on a 64 bit +implementation. + +3.7.1 Proposed Solution + +We can use various techniques to reduce this for an allocated +block: + +1. The 'next' pointer is not required, as we are using a flat + hash table. + +2. 'rec_len' can instead be expressed as an addition to key_len + and data_len (it accounts for wasted or overallocated length in + the record). Since the record length is always a multiple of 8, + we can conveniently fit it in 32 bits (representing up to 35 + bits). + +3. 'key_len' and 'data_len' can be reduced. I'm unwilling to + restrict 'data_len' to 32 bits, but instead we can combine the + two into one 64-bit field and using a 5 bit value which + indicates at what bit to divide the two. Keys are unlikely to + scale as fast as data, so I'm assuming a maximum key size of 32 + bits. + +4. 'full_hash' is used to avoid a memcmp on the “miss” case, but + this is diminishing returns after a handful of bits (at 10 + bits, it reduces 99.9% of false memcmp). As an aside, as the + lower bits are already incorporated in the hash table + resolution, the upper bits should be used here. Note that it's + not clear that these bits will be a win, given the extra bits + in the hash table itself (see [sub:Hash-Size-Solution]). + +5. 'magic' does not need to be enlarged: it currently reflects + one of 5 values (used, free, dead, recovery, and + unused_recovery). It is useful for quick sanity checking + however, and should not be eliminated. + +6. 'tailer' is only used to coalesce free blocks (so a block to + the right can find the header to check if this block is free). + This can be replaced by a single 'free' bit in the header of + the following block (and the tailer only exists in free + blocks).[footnote: +This technique from Thomas Standish. Data Structure Techniques. +Addison-Wesley, Reading, Massachusetts, 1980. +] The current proposed coalescing algorithm doesn't need this, + however. + +This produces a 16 byte used header like this: + +struct tdb_used_record { + + uint32_t used_magic : 16, + + + + key_data_divide: 5, + + top_hash: 11; + + uint32_t extra_octets; + + uint64_t key_and_data_len; + +}; + +And a free record like this: + +struct tdb_free_record { + + uint64_t free_magic: 8, + + prev : 56; + + + + uint64_t free_table: 8, + + total_length : 56 + + uint64_t next;; + +}; + +Note that by limiting valid offsets to 56 bits, we can pack +everything we need into 3 64-byte words, meaning our minimum +record size is 8 bytes. + +3.7.2 Status + +Complete. + +3.8 Transaction Commit Requires 4 fdatasync + +The current transaction algorithm is: + +1. write_recovery_data(); + +2. sync(); + +3. write_recovery_header(); + +4. sync(); + +5. overwrite_with_new_data(); + +6. sync(); + +7. remove_recovery_header(); + +8. sync(); + +On current ext3, each sync flushes all data to disk, so the next +3 syncs are relatively expensive. But this could become a +performance bottleneck on other filesystems such as ext4. + +3.8.1 Proposed Solution + +Neil Brown points out that this is overzealous, and only one sync +is needed: + +1. Bundle the recovery data, a transaction counter and a strong + checksum of the new data. + +2. Strong checksum that whole bundle. + +3. Store the bundle in the database. + +4. Overwrite the oldest of the two recovery pointers in the + header (identified using the transaction counter) with the + offset of this bundle. + +5. sync. + +6. Write the new data to the file. + +Checking for recovery means identifying the latest bundle with a +valid checksum and using the new data checksum to ensure that it +has been applied. This is more expensive than the current check, +but need only be done at open. For running databases, a separate +header field can be used to indicate a transaction in progress; +we need only check for recovery if this is set. + +3.8.2 Status + +Deferred. + +3.9 <sub:TDB-Does-Not>TDB Does Not Have Snapshot Support + +3.9.1 Proposed SolutionNone. At some point you say “use a real + database” (but see [replay-attribute]). + +But as a thought experiment, if we implemented transactions to +only overwrite free entries (this is tricky: there must not be a +header in each entry which indicates whether it is free, but use +of presence in metadata elsewhere), and a pointer to the hash +table, we could create an entirely new commit without destroying +existing data. Then it would be easy to implement snapshots in a +similar way. + +This would not allow arbitrary changes to the database, such as +tdb_repack does, and would require more space (since we have to +preserve the current and future entries at once). If we used hash +trees rather than one big hash table, we might only have to +rewrite some sections of the hash, too. + +We could then implement snapshots using a similar method, using +multiple different hash tables/free tables. + +3.9.2 Status + +Deferred. + +3.10 Transactions Cannot Operate in Parallel + +This would be useless for ldb, as it hits the index records with +just about every update. It would add significant complexity in +resolving clashes, and cause the all transaction callers to write +their code to loop in the case where the transactions spuriously +failed. + +3.10.1 Proposed Solution + +None (but see [replay-attribute]). We could solve a small part of +the problem by providing read-only transactions. These would +allow one write transaction to begin, but it could not commit +until all r/o transactions are done. This would require a new +RO_TRANSACTION_LOCK, which would be upgraded on commit. + +3.10.2 Status + +Deferred. + +3.11 Default Hash Function Is Suboptimal + +The Knuth-inspired multiplicative hash used by tdb is fairly slow +(especially if we expand it to 64 bits), and works best when the +hash bucket size is a prime number (which also means a slow +modulus). In addition, it is highly predictable which could +potentially lead to a Denial of Service attack in some TDB uses. + +3.11.1 Proposed Solution + +The Jenkins lookup3 hash[footnote: +http://burtleburtle.net/bob/c/lookup3.c +] is a fast and superbly-mixing hash. It's used by the Linux +kernel and almost everything else. This has the particular +properties that it takes an initial seed, and produces two 32 bit +hash numbers, which we can combine into a 64-bit hash. + +The seed should be created at tdb-creation time from some random +source, and placed in the header. This is far from foolproof, but +adds a little bit of protection against hash bombing. + +3.11.2 Status + +Complete. + +3.12 <Reliable-Traversal-Adds>Reliable Traversal Adds Complexity + +We lock a record during traversal iteration, and try to grab that +lock in the delete code. If that grab on delete fails, we simply +mark it deleted and continue onwards; traversal checks for this +condition and does the delete when it moves off the record. + +If traversal terminates, the dead record may be left +indefinitely. + +3.12.1 Proposed Solution + +Remove reliability guarantees; see [traverse-Proposed-Solution]. + +3.12.2 Status + +Complete. + +3.13 Fcntl Locking Adds Overhead + +Placing a fcntl lock means a system call, as does removing one. +This is actually one reason why transactions can be faster +(everything is locked once at transaction start). In the +uncontended case, this overhead can theoretically be eliminated. + +3.13.1 Proposed Solution + +None. + +We tried this before with spinlock support, in the early days of +TDB, and it didn't make much difference except in manufactured +benchmarks. + +We could use spinlocks (with futex kernel support under Linux), +but it means that we lose automatic cleanup when a process dies +with a lock. There is a method of auto-cleanup under Linux, but +it's not supported by other operating systems. We could +reintroduce a clear-if-first-style lock and sweep for dead +futexes on open, but that wouldn't help the normal case of one +concurrent opener dying. Increasingly elaborate repair schemes +could be considered, but they require an ABI change (everyone +must use them) anyway, so there's no need to do this at the same +time as everything else. + +3.14 Some Transactions Don't Require Durability + +Volker points out that gencache uses a CLEAR_IF_FIRST tdb for +normal (fast) usage, and occasionally empties the results into a +transactional TDB. This kind of usage prioritizes performance +over durability: as long as we are consistent, data can be lost. + +This would be more neatly implemented inside tdb: a “soft” +transaction commit (ie. syncless) which meant that data may be +reverted on a crash. + +3.14.1 Proposed Solution + +None. + +Unfortunately any transaction scheme which overwrites old data +requires a sync before that overwrite to avoid the possibility of +corruption. + +It seems possible to use a scheme similar to that described in [sub:TDB-Does-Not] +,where transactions are committed without overwriting existing +data, and an array of top-level pointers were available in the +header. If the transaction is “soft” then we would not need a +sync at all: existing processes would pick up the new hash table +and free list and work with that. + +At some later point, a sync would allow recovery of the old data +into the free lists (perhaps when the array of top-level pointers +filled). On crash, tdb_open() would examine the array of top +levels, and apply the transactions until it encountered an +invalid checksum. + +3.15 Tracing Is Fragile, Replay Is External + +The current TDB has compile-time-enabled tracing code, but it +often breaks as it is not enabled by default. In a similar way, +the ctdb code has an external wrapper which does replay tracing +so it can coordinate cluster-wide transactions. + +3.15.1 Proposed Solution<replay-attribute> + +Tridge points out that an attribute can be later added to +tdb_open (see [attributes]) to provide replay/trace hooks, which +could become the basis for this and future parallel transactions +and snapshot support. + +3.15.2 Status + +Deferred. diff --git a/lib/tdb2/free.c b/lib/tdb2/free.c new file mode 100644 index 0000000000..a770751dc0 --- /dev/null +++ b/lib/tdb2/free.c @@ -0,0 +1,968 @@ + /* + Trivial Database 2: free list/block handling + Copyright (C) Rusty Russell 2010 + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 3 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, see <http://www.gnu.org/licenses/>. +*/ +#include "private.h" +#include <ccan/likely/likely.h> +#include <ccan/ilog/ilog.h> +#include <time.h> +#include <assert.h> +#include <limits.h> + +static unsigned fls64(uint64_t val) +{ + return ilog64(val); +} + +/* In which bucket would we find a particular record size? (ignoring header) */ +unsigned int size_to_bucket(tdb_len_t data_len) +{ + unsigned int bucket; + + /* We can't have records smaller than this. */ + assert(data_len >= TDB_MIN_DATA_LEN); + + /* Ignoring the header... */ + if (data_len - TDB_MIN_DATA_LEN <= 64) { + /* 0 in bucket 0, 8 in bucket 1... 64 in bucket 8. */ + bucket = (data_len - TDB_MIN_DATA_LEN) / 8; + } else { + /* After that we go power of 2. */ + bucket = fls64(data_len - TDB_MIN_DATA_LEN) + 2; + } + + if (unlikely(bucket >= TDB_FREE_BUCKETS)) + bucket = TDB_FREE_BUCKETS - 1; + return bucket; +} + +tdb_off_t first_ftable(struct tdb_context *tdb) +{ + return tdb_read_off(tdb, offsetof(struct tdb_header, free_table)); +} + +tdb_off_t next_ftable(struct tdb_context *tdb, tdb_off_t ftable) +{ + return tdb_read_off(tdb, ftable + offsetof(struct tdb_freetable,next)); +} + +enum TDB_ERROR tdb_ftable_init(struct tdb_context *tdb) +{ + /* Use reservoir sampling algorithm to select a free list at random. */ + unsigned int rnd, max = 0, count = 0; + tdb_off_t off; + + tdb->ftable_off = off = first_ftable(tdb); + tdb->ftable = 0; + + while (off) { + if (TDB_OFF_IS_ERR(off)) { + return off; + } + + rnd = random(); + if (rnd >= max) { + tdb->ftable_off = off; + tdb->ftable = count; + max = rnd; + } + + off = next_ftable(tdb, off); + count++; + } + return TDB_SUCCESS; +} + +/* Offset of a given bucket. */ +tdb_off_t bucket_off(tdb_off_t ftable_off, unsigned bucket) +{ + return ftable_off + offsetof(struct tdb_freetable, buckets) + + bucket * sizeof(tdb_off_t); +} + +/* Returns free_buckets + 1, or list number to search, or -ve error. */ +static tdb_off_t find_free_head(struct tdb_context *tdb, + tdb_off_t ftable_off, + tdb_off_t bucket) +{ + /* Speculatively search for a non-zero bucket. */ + return tdb_find_nonzero_off(tdb, bucket_off(ftable_off, 0), + bucket, TDB_FREE_BUCKETS); +} + +static void check_list(struct tdb_context *tdb, tdb_off_t b_off) +{ +#ifdef CCAN_TDB2_DEBUG + tdb_off_t off, prev = 0, first; + struct tdb_free_record r; + + first = off = (tdb_read_off(tdb, b_off) & TDB_OFF_MASK); + while (off != 0) { + tdb_read_convert(tdb, off, &r, sizeof(r)); + if (frec_magic(&r) != TDB_FREE_MAGIC) + abort(); + if (prev && frec_prev(&r) != prev) + abort(); + prev = off; + off = r.next; + } + + if (first) { + tdb_read_convert(tdb, first, &r, sizeof(r)); + if (frec_prev(&r) != prev) + abort(); + } +#endif +} + +/* Remove from free bucket. */ +static enum TDB_ERROR remove_from_list(struct tdb_context *tdb, + tdb_off_t b_off, tdb_off_t r_off, + const struct tdb_free_record *r) +{ + tdb_off_t off, prev_next, head; + enum TDB_ERROR ecode; + + /* Is this only element in list? Zero out bucket, and we're done. */ + if (frec_prev(r) == r_off) + return tdb_write_off(tdb, b_off, 0); + + /* off = &r->prev->next */ + off = frec_prev(r) + offsetof(struct tdb_free_record, next); + + /* Get prev->next */ + prev_next = tdb_read_off(tdb, off); + if (TDB_OFF_IS_ERR(prev_next)) + return prev_next; + + /* If prev->next == 0, we were head: update bucket to point to next. */ + if (prev_next == 0) { + /* We must preserve upper bits. */ + head = tdb_read_off(tdb, b_off); + if (TDB_OFF_IS_ERR(head)) + return head; + + if ((head & TDB_OFF_MASK) != r_off) { + return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR, + "remove_from_list:" + " %llu head %llu on list %llu", + (long long)r_off, + (long long)head, + (long long)b_off); + } + head = ((head & ~TDB_OFF_MASK) | r->next); + ecode = tdb_write_off(tdb, b_off, head); + if (ecode != TDB_SUCCESS) + return ecode; + } else { + /* r->prev->next = r->next */ + ecode = tdb_write_off(tdb, off, r->next); + if (ecode != TDB_SUCCESS) + return ecode; + } + + /* If we were the tail, off = &head->prev. */ + if (r->next == 0) { + head = tdb_read_off(tdb, b_off); + if (TDB_OFF_IS_ERR(head)) + return head; + head &= TDB_OFF_MASK; + off = head + offsetof(struct tdb_free_record, magic_and_prev); + } else { + /* off = &r->next->prev */ + off = r->next + offsetof(struct tdb_free_record, + magic_and_prev); + } + +#ifdef CCAN_TDB2_DEBUG + /* *off == r */ + if ((tdb_read_off(tdb, off) & TDB_OFF_MASK) != r_off) { + return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR, + "remove_from_list:" + " %llu bad prev in list %llu", + (long long)r_off, (long long)b_off); + } +#endif + /* r->next->prev = r->prev */ + return tdb_write_off(tdb, off, r->magic_and_prev); +} + +/* Enqueue in this free bucket: sets coalesce if we've added 128 + * entries to it. */ +static enum TDB_ERROR enqueue_in_free(struct tdb_context *tdb, + tdb_off_t b_off, + tdb_off_t off, + tdb_len_t len, + bool *coalesce) +{ + struct tdb_free_record new; + enum TDB_ERROR ecode; + tdb_off_t prev, head; + uint64_t magic = (TDB_FREE_MAGIC << (64 - TDB_OFF_UPPER_STEAL)); + + head = tdb_read_off(tdb, b_off); + if (TDB_OFF_IS_ERR(head)) + return head; + + /* We only need to set ftable_and_len; rest is set in enqueue_in_free */ + new.ftable_and_len = ((uint64_t)tdb->ftable << (64 - TDB_OFF_UPPER_STEAL)) + | len; + + /* new->next = head. */ + new.next = (head & TDB_OFF_MASK); + + /* First element? Prev points to ourselves. */ + if (!new.next) { + new.magic_and_prev = (magic | off); + } else { + /* new->prev = next->prev */ + prev = tdb_read_off(tdb, + new.next + offsetof(struct tdb_free_record, + magic_and_prev)); + new.magic_and_prev = prev; + if (frec_magic(&new) != TDB_FREE_MAGIC) { + return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR, + "enqueue_in_free: %llu bad head" + " prev %llu", + (long long)new.next, + (long long)prev); + } + /* next->prev = new. */ + ecode = tdb_write_off(tdb, new.next + + offsetof(struct tdb_free_record, + magic_and_prev), + off | magic); + if (ecode != TDB_SUCCESS) { + return ecode; + } + +#ifdef CCAN_TDB2_DEBUG + prev = tdb_read_off(tdb, frec_prev(&new) + + offsetof(struct tdb_free_record, next)); + if (prev != 0) { + return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR, + "enqueue_in_free:" + " %llu bad tail next ptr %llu", + (long long)frec_prev(&new) + + offsetof(struct tdb_free_record, + next), + (long long)prev); + } +#endif + } + + /* Update enqueue count, but don't set high bit: see TDB_OFF_IS_ERR */ + if (*coalesce) + head += (1ULL << (64 - TDB_OFF_UPPER_STEAL)); + head &= ~(TDB_OFF_MASK | (1ULL << 63)); + head |= off; + + ecode = tdb_write_off(tdb, b_off, head); + if (ecode != TDB_SUCCESS) { + return ecode; + } + + /* It's time to coalesce if counter wrapped. */ + if (*coalesce) + *coalesce = ((head & ~TDB_OFF_MASK) == 0); + + return tdb_write_convert(tdb, off, &new, sizeof(new)); +} + +static tdb_off_t ftable_offset(struct tdb_context *tdb, unsigned int ftable) +{ + tdb_off_t off; + unsigned int i; + + if (likely(tdb->ftable == ftable)) + return tdb->ftable_off; + + off = first_ftable(tdb); + for (i = 0; i < ftable; i++) { + if (TDB_OFF_IS_ERR(off)) { + break; + } + off = next_ftable(tdb, off); + } + return off; +} + +/* Note: we unlock the current bucket if fail (-ve), or coalesce (+ve) and + * need to blatt the *protect record (which is set to an error). */ +static tdb_len_t coalesce(struct tdb_context *tdb, + tdb_off_t off, tdb_off_t b_off, + tdb_len_t data_len, + tdb_off_t *protect) +{ + tdb_off_t end; + struct tdb_free_record rec; + enum TDB_ERROR ecode; + + tdb->stats.alloc_coalesce_tried++; + end = off + sizeof(struct tdb_used_record) + data_len; + + while (end < tdb->file->map_size) { + const struct tdb_free_record *r; + tdb_off_t nb_off; + unsigned ftable, bucket; + + r = tdb_access_read(tdb, end, sizeof(*r), true); + if (TDB_PTR_IS_ERR(r)) { + ecode = TDB_PTR_ERR(r); + goto err; + } + + if (frec_magic(r) != TDB_FREE_MAGIC + || frec_ftable(r) == TDB_FTABLE_NONE) { + tdb_access_release(tdb, r); + break; + } + + ftable = frec_ftable(r); + bucket = size_to_bucket(frec_len(r)); + nb_off = ftable_offset(tdb, ftable); + if (TDB_OFF_IS_ERR(nb_off)) { + tdb_access_release(tdb, r); + ecode = nb_off; + goto err; + } + nb_off = bucket_off(nb_off, bucket); + tdb_access_release(tdb, r); + + /* We may be violating lock order here, so best effort. */ + if (tdb_lock_free_bucket(tdb, nb_off, TDB_LOCK_NOWAIT) + != TDB_SUCCESS) { + tdb->stats.alloc_coalesce_lockfail++; + break; + } + + /* Now we have lock, re-check. */ + ecode = tdb_read_convert(tdb, end, &rec, sizeof(rec)); + if (ecode != TDB_SUCCESS) { + tdb_unlock_free_bucket(tdb, nb_off); + goto err; + } + + if (unlikely(frec_magic(&rec) != TDB_FREE_MAGIC)) { + tdb->stats.alloc_coalesce_race++; + tdb_unlock_free_bucket(tdb, nb_off); + break; + } + + if (unlikely(frec_ftable(&rec) != ftable) + || unlikely(size_to_bucket(frec_len(&rec)) != bucket)) { + tdb->stats.alloc_coalesce_race++; + tdb_unlock_free_bucket(tdb, nb_off); + break; + } + + /* Did we just mess up a record you were hoping to use? */ + if (end == *protect) { + tdb->stats.alloc_coalesce_iterate_clash++; + *protect = TDB_ERR_NOEXIST; + } + + ecode = remove_from_list(tdb, nb_off, end, &rec); + check_list(tdb, nb_off); + if (ecode != TDB_SUCCESS) { + tdb_unlock_free_bucket(tdb, nb_off); + goto err; + } + + end += sizeof(struct tdb_used_record) + frec_len(&rec); + tdb_unlock_free_bucket(tdb, nb_off); + tdb->stats.alloc_coalesce_num_merged++; + } + + /* Didn't find any adjacent free? */ + if (end == off + sizeof(struct tdb_used_record) + data_len) + return 0; + + /* Before we expand, check this isn't one you wanted protected? */ + if (off == *protect) { + *protect = TDB_ERR_EXISTS; + tdb->stats.alloc_coalesce_iterate_clash++; + } + + /* OK, expand initial record */ + ecode = tdb_read_convert(tdb, off, &rec, sizeof(rec)); + if (ecode != TDB_SUCCESS) { + goto err; + } + + if (frec_len(&rec) != data_len) { + ecode = tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR, + "coalesce: expected data len %zu not %zu", + (size_t)data_len, (size_t)frec_len(&rec)); + goto err; + } + + ecode = remove_from_list(tdb, b_off, off, &rec); + check_list(tdb, b_off); + if (ecode != TDB_SUCCESS) { + goto err; + } + + /* Try locking violation first. We don't allow coalesce recursion! */ + ecode = add_free_record(tdb, off, end - off, TDB_LOCK_NOWAIT, false); + if (ecode != TDB_SUCCESS) { + /* Need to drop lock. Can't rely on anything stable. */ + tdb->stats.alloc_coalesce_lockfail++; + *protect = TDB_ERR_CORRUPT; + + /* We have to drop this to avoid deadlocks, so make sure record + * doesn't get coalesced by someone else! */ + rec.ftable_and_len = (TDB_FTABLE_NONE + << (64 - TDB_OFF_UPPER_STEAL)) + | (end - off - sizeof(struct tdb_used_record)); + ecode = tdb_write_off(tdb, + off + offsetof(struct tdb_free_record, + ftable_and_len), + rec.ftable_and_len); + if (ecode != TDB_SUCCESS) { + goto err; + } + + tdb_unlock_free_bucket(tdb, b_off); + + ecode = add_free_record(tdb, off, end - off, TDB_LOCK_WAIT, + false); + if (ecode != TDB_SUCCESS) { + return ecode; + } + } else if (TDB_OFF_IS_ERR(*protect)) { + /* For simplicity, we always drop lock if they can't continue */ + tdb_unlock_free_bucket(tdb, b_off); + } + tdb->stats.alloc_coalesce_succeeded++; + + /* Return usable length. */ + return end - off - sizeof(struct tdb_used_record); + +err: + /* To unify error paths, we *always* unlock bucket on error. */ + tdb_unlock_free_bucket(tdb, b_off); + return ecode; +} + +/* List is locked: we unlock it. */ +static enum TDB_ERROR coalesce_list(struct tdb_context *tdb, + tdb_off_t ftable_off, + tdb_off_t b_off, + unsigned int limit) +{ + enum TDB_ERROR ecode; + tdb_off_t off; + + off = tdb_read_off(tdb, b_off); + if (TDB_OFF_IS_ERR(off)) { + ecode = off; + goto unlock_err; + } + /* A little bit of paranoia: counter should be 0. */ + off &= TDB_OFF_MASK; + + while (off && limit--) { + struct tdb_free_record rec; + tdb_len_t coal; + tdb_off_t next; + + ecode = tdb_read_convert(tdb, off, &rec, sizeof(rec)); + if (ecode != TDB_SUCCESS) + goto unlock_err; + + next = rec.next; + coal = coalesce(tdb, off, b_off, frec_len(&rec), &next); + if (TDB_OFF_IS_ERR(coal)) { + /* This has already unlocked on error. */ + return coal; + } + if (TDB_OFF_IS_ERR(next)) { + /* Coalescing had to unlock, so stop. */ + return TDB_SUCCESS; + } + /* Keep going if we're doing well... */ + limit += size_to_bucket(coal / 16 + TDB_MIN_DATA_LEN); + off = next; + } + + /* Now, move those elements to the tail of the list so we get something + * else next time. */ + if (off) { + struct tdb_free_record oldhrec, newhrec, oldtrec, newtrec; + tdb_off_t oldhoff, oldtoff, newtoff; + + /* The record we were up to is the new head. */ + ecode = tdb_read_convert(tdb, off, &newhrec, sizeof(newhrec)); + if (ecode != TDB_SUCCESS) + goto unlock_err; + + /* Get the new tail. */ + newtoff = frec_prev(&newhrec); + ecode = tdb_read_convert(tdb, newtoff, &newtrec, + sizeof(newtrec)); + if (ecode != TDB_SUCCESS) + goto unlock_err; + + /* Get the old head. */ + oldhoff = tdb_read_off(tdb, b_off); + if (TDB_OFF_IS_ERR(oldhoff)) { + ecode = oldhoff; + goto unlock_err; + } + + /* This could happen if they all coalesced away. */ + if (oldhoff == off) + goto out; + + ecode = tdb_read_convert(tdb, oldhoff, &oldhrec, + sizeof(oldhrec)); + if (ecode != TDB_SUCCESS) + goto unlock_err; + + /* Get the old tail. */ + oldtoff = frec_prev(&oldhrec); + ecode = tdb_read_convert(tdb, oldtoff, &oldtrec, + sizeof(oldtrec)); + if (ecode != TDB_SUCCESS) + goto unlock_err; + + /* Old tail's next points to old head. */ + oldtrec.next = oldhoff; + + /* Old head's prev points to old tail. */ + oldhrec.magic_and_prev + = (TDB_FREE_MAGIC << (64 - TDB_OFF_UPPER_STEAL)) + | oldtoff; + + /* New tail's next is 0. */ + newtrec.next = 0; + + /* Write out the modified versions. */ + ecode = tdb_write_convert(tdb, oldtoff, &oldtrec, + sizeof(oldtrec)); + if (ecode != TDB_SUCCESS) + goto unlock_err; + + ecode = tdb_write_convert(tdb, oldhoff, &oldhrec, + sizeof(oldhrec)); + if (ecode != TDB_SUCCESS) + goto unlock_err; + + ecode = tdb_write_convert(tdb, newtoff, &newtrec, + sizeof(newtrec)); + if (ecode != TDB_SUCCESS) + goto unlock_err; + + /* And finally link in new head. */ + ecode = tdb_write_off(tdb, b_off, off); + if (ecode != TDB_SUCCESS) + goto unlock_err; + } +out: + tdb_unlock_free_bucket(tdb, b_off); + return TDB_SUCCESS; + +unlock_err: + tdb_unlock_free_bucket(tdb, b_off); + return ecode; +} + +/* List must not be locked if coalesce_ok is set. */ +enum TDB_ERROR add_free_record(struct tdb_context *tdb, + tdb_off_t off, tdb_len_t len_with_header, + enum tdb_lock_flags waitflag, + bool coalesce) +{ + tdb_off_t b_off; + tdb_len_t len; + enum TDB_ERROR ecode; + + assert(len_with_header >= sizeof(struct tdb_free_record)); + + len = len_with_header - sizeof(struct tdb_used_record); + + b_off = bucket_off(tdb->ftable_off, size_to_bucket(len)); + ecode = tdb_lock_free_bucket(tdb, b_off, waitflag); + if (ecode != TDB_SUCCESS) { + return ecode; + } + + ecode = enqueue_in_free(tdb, b_off, off, len, &coalesce); + check_list(tdb, b_off); + + /* Coalescing unlocks free list. */ + if (!ecode && coalesce) + ecode = coalesce_list(tdb, tdb->ftable_off, b_off, 2); + else + tdb_unlock_free_bucket(tdb, b_off); + return ecode; +} + +static size_t adjust_size(size_t keylen, size_t datalen) +{ + size_t size = keylen + datalen; + + if (size < TDB_MIN_DATA_LEN) + size = TDB_MIN_DATA_LEN; + + /* Round to next uint64_t boundary. */ + return (size + (sizeof(uint64_t) - 1ULL)) & ~(sizeof(uint64_t) - 1ULL); +} + +/* If we have enough left over to be useful, split that off. */ +static size_t record_leftover(size_t keylen, size_t datalen, + bool want_extra, size_t total_len) +{ + ssize_t leftover; + + if (want_extra) + datalen += datalen / 2; + leftover = total_len - adjust_size(keylen, datalen); + + if (leftover < (ssize_t)sizeof(struct tdb_free_record)) + return 0; + + return leftover; +} + +/* We need size bytes to put our key and data in. */ +static tdb_off_t lock_and_alloc(struct tdb_context *tdb, + tdb_off_t ftable_off, + tdb_off_t bucket, + size_t keylen, size_t datalen, + bool want_extra, + unsigned magic, + unsigned hashlow) +{ + tdb_off_t off, b_off,best_off; + struct tdb_free_record best = { 0 }; + double multiplier; + size_t size = adjust_size(keylen, datalen); + enum TDB_ERROR ecode; + + tdb->stats.allocs++; + b_off = bucket_off(ftable_off, bucket); + + /* FIXME: Try non-blocking wait first, to measure contention. */ + /* Lock this bucket. */ + ecode = tdb_lock_free_bucket(tdb, b_off, TDB_LOCK_WAIT); + if (ecode != TDB_SUCCESS) { + return ecode; + } + + best.ftable_and_len = -1ULL; + best_off = 0; + + /* Get slack if we're after extra. */ + if (want_extra) + multiplier = 1.5; + else + multiplier = 1.0; + + /* Walk the list to see if any are large enough, getting less fussy + * as we go. */ + off = tdb_read_off(tdb, b_off); + if (TDB_OFF_IS_ERR(off)) { + ecode = off; + goto unlock_err; + } + off &= TDB_OFF_MASK; + + while (off) { + const struct tdb_free_record *r; + tdb_len_t len; + tdb_off_t next; + + r = tdb_access_read(tdb, off, sizeof(*r), true); + if (TDB_PTR_IS_ERR(r)) { + ecode = TDB_PTR_ERR(r); + goto unlock_err; + } + + if (frec_magic(r) != TDB_FREE_MAGIC) { + ecode = tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR, + "lock_and_alloc:" + " %llu non-free 0x%llx", + (long long)off, + (long long)r->magic_and_prev); + tdb_access_release(tdb, r); + goto unlock_err; + } + + if (frec_len(r) >= size && frec_len(r) < frec_len(&best)) { + best_off = off; + best = *r; + } + + if (frec_len(&best) <= size * multiplier && best_off) { + tdb_access_release(tdb, r); + break; + } + + multiplier *= 1.01; + + next = r->next; + len = frec_len(r); + tdb_access_release(tdb, r); + off = next; + } + + /* If we found anything at all, use it. */ + if (best_off) { + struct tdb_used_record rec; + size_t leftover; + + /* We're happy with this size: take it. */ + ecode = remove_from_list(tdb, b_off, best_off, &best); + check_list(tdb, b_off); + if (ecode != TDB_SUCCESS) { + goto unlock_err; + } + + leftover = record_leftover(keylen, datalen, want_extra, + frec_len(&best)); + + assert(keylen + datalen + leftover <= frec_len(&best)); + /* We need to mark non-free before we drop lock, otherwise + * coalesce() could try to merge it! */ + ecode = set_header(tdb, &rec, magic, keylen, datalen, + frec_len(&best) - leftover, hashlow); + if (ecode != TDB_SUCCESS) { + goto unlock_err; + } + + ecode = tdb_write_convert(tdb, best_off, &rec, sizeof(rec)); + if (ecode != TDB_SUCCESS) { + goto unlock_err; + } + + /* For futureproofing, we put a 0 in any unused space. */ + if (rec_extra_padding(&rec)) { + ecode = tdb->methods->twrite(tdb, best_off + sizeof(rec) + + keylen + datalen, "", 1); + if (ecode != TDB_SUCCESS) { + goto unlock_err; + } + } + + /* Bucket of leftover will be <= current bucket, so nested + * locking is allowed. */ + if (leftover) { + tdb->stats.alloc_leftover++; + ecode = add_free_record(tdb, + best_off + sizeof(rec) + + frec_len(&best) - leftover, + leftover, TDB_LOCK_WAIT, false); + if (ecode != TDB_SUCCESS) { + best_off = ecode; + } + } + tdb_unlock_free_bucket(tdb, b_off); + + return best_off; + } + + tdb_unlock_free_bucket(tdb, b_off); + return 0; + +unlock_err: + tdb_unlock_free_bucket(tdb, b_off); + return ecode; +} + +/* Get a free block from current free list, or 0 if none, -ve on error. */ +static tdb_off_t get_free(struct tdb_context *tdb, + size_t keylen, size_t datalen, bool want_extra, + unsigned magic, unsigned hashlow) +{ + tdb_off_t off, ftable_off; + tdb_off_t start_b, b, ftable; + bool wrapped = false; + + /* If they are growing, add 50% to get to higher bucket. */ + if (want_extra) + start_b = size_to_bucket(adjust_size(keylen, + datalen + datalen / 2)); + else + start_b = size_to_bucket(adjust_size(keylen, datalen)); + + ftable_off = tdb->ftable_off; + ftable = tdb->ftable; + while (!wrapped || ftable_off != tdb->ftable_off) { + /* Start at exact size bucket, and search up... */ + for (b = find_free_head(tdb, ftable_off, start_b); + b < TDB_FREE_BUCKETS; + b = find_free_head(tdb, ftable_off, b + 1)) { + /* Try getting one from list. */ + off = lock_and_alloc(tdb, ftable_off, + b, keylen, datalen, want_extra, + magic, hashlow); + if (TDB_OFF_IS_ERR(off)) + return off; + if (off != 0) { + if (b == start_b) + tdb->stats.alloc_bucket_exact++; + if (b == TDB_FREE_BUCKETS - 1) + tdb->stats.alloc_bucket_max++; + /* Worked? Stay using this list. */ + tdb->ftable_off = ftable_off; + tdb->ftable = ftable; + return off; + } + /* Didn't work. Try next bucket. */ + } + + if (TDB_OFF_IS_ERR(b)) { + return b; + } + + /* Hmm, try next table. */ + ftable_off = next_ftable(tdb, ftable_off); + if (TDB_OFF_IS_ERR(ftable_off)) { + return ftable_off; + } + ftable++; + + if (ftable_off == 0) { + wrapped = true; + ftable_off = first_ftable(tdb); + if (TDB_OFF_IS_ERR(ftable_off)) { + return ftable_off; + } + ftable = 0; + } + } + + return 0; +} + +enum TDB_ERROR set_header(struct tdb_context *tdb, + struct tdb_used_record *rec, + unsigned magic, uint64_t keylen, uint64_t datalen, + uint64_t actuallen, unsigned hashlow) +{ + uint64_t keybits = (fls64(keylen) + 1) / 2; + + /* Use bottom bits of hash, so it's independent of hash table size. */ + rec->magic_and_meta = (hashlow & ((1 << 11)-1)) + | ((actuallen - (keylen + datalen)) << 11) + | (keybits << 43) + | ((uint64_t)magic << 48); + rec->key_and_data_len = (keylen | (datalen << (keybits*2))); + + /* Encoding can fail on big values. */ + if (rec_key_length(rec) != keylen + || rec_data_length(rec) != datalen + || rec_extra_padding(rec) != actuallen - (keylen + datalen)) { + return tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR, + "Could not encode k=%llu,d=%llu,a=%llu", + (long long)keylen, (long long)datalen, + (long long)actuallen); + } + return TDB_SUCCESS; +} + +/* Expand the database. */ +static enum TDB_ERROR tdb_expand(struct tdb_context *tdb, tdb_len_t size) +{ + uint64_t old_size, rec_size, map_size; + tdb_len_t wanted; + enum TDB_ERROR ecode; + + /* Need to hold a hash lock to expand DB: transactions rely on it. */ + if (!(tdb->flags & TDB_NOLOCK) + && !tdb->file->allrecord_lock.count && !tdb_has_hash_locks(tdb)) { + return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR, + "tdb_expand: must hold lock during expand"); + } + + /* Only one person can expand file at a time. */ + ecode = tdb_lock_expand(tdb, F_WRLCK); + if (ecode != TDB_SUCCESS) { + return ecode; + } + + /* Someone else may have expanded the file, so retry. */ + old_size = tdb->file->map_size; + tdb->methods->oob(tdb, tdb->file->map_size + 1, true); + if (tdb->file->map_size != old_size) { + tdb_unlock_expand(tdb, F_WRLCK); + return TDB_SUCCESS; + } + + /* limit size in order to avoid using up huge amounts of memory for + * in memory tdbs if an oddball huge record creeps in */ + if (size > 100 * 1024) { + rec_size = size * 2; + } else { + rec_size = size * 100; + } + + /* always make room for at least rec_size more records, and at + least 25% more space. if the DB is smaller than 100MiB, + otherwise grow it by 10% only. */ + if (old_size > 100 * 1024 * 1024) { + map_size = old_size / 10; + } else { + map_size = old_size / 4; + } + + if (map_size > rec_size) { + wanted = map_size; + } else { + wanted = rec_size; + } + + /* We need room for the record header too. */ + wanted = adjust_size(0, sizeof(struct tdb_used_record) + wanted); + + ecode = tdb->methods->expand_file(tdb, wanted); + if (ecode != TDB_SUCCESS) { + tdb_unlock_expand(tdb, F_WRLCK); + return ecode; + } + + /* We need to drop this lock before adding free record. */ + tdb_unlock_expand(tdb, F_WRLCK); + + tdb->stats.expands++; + return add_free_record(tdb, old_size, wanted, TDB_LOCK_WAIT, true); +} + +/* This won't fail: it will expand the database if it has to. */ +tdb_off_t alloc(struct tdb_context *tdb, size_t keylen, size_t datalen, + uint64_t hash, unsigned magic, bool growing) +{ + tdb_off_t off; + + /* We can't hold pointers during this: we could unmap! */ + assert(!tdb->direct_access); + + for (;;) { + enum TDB_ERROR ecode; + off = get_free(tdb, keylen, datalen, growing, magic, hash); + if (likely(off != 0)) + break; + + ecode = tdb_expand(tdb, adjust_size(keylen, datalen)); + if (ecode != TDB_SUCCESS) { + return ecode; + } + } + + return off; +} diff --git a/lib/tdb2/hash.c b/lib/tdb2/hash.c new file mode 100644 index 0000000000..1359cfecd6 --- /dev/null +++ b/lib/tdb2/hash.c @@ -0,0 +1,881 @@ + /* + Trivial Database 2: hash handling + Copyright (C) Rusty Russell 2010 + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 3 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, see <http://www.gnu.org/licenses/>. +*/ +#include "private.h" +#include <assert.h> + +uint64_t tdb_hash(struct tdb_context *tdb, const void *ptr, size_t len) +{ + return tdb->hash_fn(ptr, len, tdb->hash_seed, tdb->hash_data); +} + +uint64_t hash_record(struct tdb_context *tdb, tdb_off_t off) +{ + const struct tdb_used_record *r; + const void *key; + uint64_t klen, hash; + + r = tdb_access_read(tdb, off, sizeof(*r), true); + if (TDB_PTR_IS_ERR(r)) { + /* FIXME */ + return 0; + } + + klen = rec_key_length(r); + tdb_access_release(tdb, r); + + key = tdb_access_read(tdb, off + sizeof(*r), klen, false); + if (TDB_PTR_IS_ERR(key)) { + return 0; + } + + hash = tdb_hash(tdb, key, klen); + tdb_access_release(tdb, key); + return hash; +} + +/* Get bits from a value. */ +static uint32_t bits_from(uint64_t val, unsigned start, unsigned num) +{ + assert(num <= 32); + return (val >> start) & ((1U << num) - 1); +} + +/* We take bits from the top: that way we can lock whole sections of the hash + * by using lock ranges. */ +static uint32_t use_bits(struct hash_info *h, unsigned num) +{ + h->hash_used += num; + return bits_from(h->h, 64 - h->hash_used, num); +} + +static tdb_bool_err key_matches(struct tdb_context *tdb, + const struct tdb_used_record *rec, + tdb_off_t off, + const struct tdb_data *key) +{ + tdb_bool_err ret = false; + const char *rkey; + + if (rec_key_length(rec) != key->dsize) { + tdb->stats.compare_wrong_keylen++; + return ret; + } + + rkey = tdb_access_read(tdb, off + sizeof(*rec), key->dsize, false); + if (TDB_PTR_IS_ERR(rkey)) { + return TDB_PTR_ERR(rkey); + } + if (memcmp(rkey, key->dptr, key->dsize) == 0) + ret = true; + else + tdb->stats.compare_wrong_keycmp++; + tdb_access_release(tdb, rkey); + return ret; +} + +/* Does entry match? */ +static tdb_bool_err match(struct tdb_context *tdb, + struct hash_info *h, + const struct tdb_data *key, + tdb_off_t val, + struct tdb_used_record *rec) +{ + tdb_off_t off; + enum TDB_ERROR ecode; + + tdb->stats.compares++; + /* Desired bucket must match. */ + if (h->home_bucket != (val & TDB_OFF_HASH_GROUP_MASK)) { + tdb->stats.compare_wrong_bucket++; + return false; + } + + /* Top bits of offset == next bits of hash. */ + if (bits_from(val, TDB_OFF_HASH_EXTRA_BIT, TDB_OFF_UPPER_STEAL_EXTRA) + != bits_from(h->h, 64 - h->hash_used - TDB_OFF_UPPER_STEAL_EXTRA, + TDB_OFF_UPPER_STEAL_EXTRA)) { + tdb->stats.compare_wrong_offsetbits++; + return false; + } + + off = val & TDB_OFF_MASK; + ecode = tdb_read_convert(tdb, off, rec, sizeof(*rec)); + if (ecode != TDB_SUCCESS) { + return ecode; + } + + if ((h->h & ((1 << 11)-1)) != rec_hash(rec)) { + tdb->stats.compare_wrong_rechash++; + return false; + } + + return key_matches(tdb, rec, off, key); +} + +static tdb_off_t hbucket_off(tdb_off_t group_start, unsigned bucket) +{ + return group_start + + (bucket % (1 << TDB_HASH_GROUP_BITS)) * sizeof(tdb_off_t); +} + +bool is_subhash(tdb_off_t val) +{ + return (val >> TDB_OFF_UPPER_STEAL_SUBHASH_BIT) & 1; +} + +/* FIXME: Guess the depth, don't over-lock! */ +static tdb_off_t hlock_range(tdb_off_t group, tdb_off_t *size) +{ + *size = 1ULL << (64 - (TDB_TOPLEVEL_HASH_BITS - TDB_HASH_GROUP_BITS)); + return group << (64 - (TDB_TOPLEVEL_HASH_BITS - TDB_HASH_GROUP_BITS)); +} + +static tdb_off_t COLD find_in_chain(struct tdb_context *tdb, + struct tdb_data key, + tdb_off_t chain, + struct hash_info *h, + struct tdb_used_record *rec, + struct traverse_info *tinfo) +{ + tdb_off_t off, next; + enum TDB_ERROR ecode; + + /* In case nothing is free, we set these to zero. */ + h->home_bucket = h->found_bucket = 0; + + for (off = chain; off; off = next) { + unsigned int i; + + h->group_start = off; + ecode = tdb_read_convert(tdb, off, h->group, sizeof(h->group)); + if (ecode != TDB_SUCCESS) { + return ecode; + } + + for (i = 0; i < (1 << TDB_HASH_GROUP_BITS); i++) { + tdb_off_t recoff; + if (!h->group[i]) { + /* Remember this empty bucket. */ + h->home_bucket = h->found_bucket = i; + continue; + } + + /* We can insert extra bits via add_to_hash + * empty bucket logic. */ + recoff = h->group[i] & TDB_OFF_MASK; + ecode = tdb_read_convert(tdb, recoff, rec, + sizeof(*rec)); + if (ecode != TDB_SUCCESS) { + return ecode; + } + + ecode = key_matches(tdb, rec, recoff, &key); + if (ecode < 0) { + return ecode; + } + if (ecode == 1) { + h->home_bucket = h->found_bucket = i; + + if (tinfo) { + tinfo->levels[tinfo->num_levels] + .hashtable = off; + tinfo->levels[tinfo->num_levels] + .total_buckets + = 1 << TDB_HASH_GROUP_BITS; + tinfo->levels[tinfo->num_levels].entry + = i; + tinfo->num_levels++; + } + return recoff; + } + } + next = tdb_read_off(tdb, off + + offsetof(struct tdb_chain, next)); + if (TDB_OFF_IS_ERR(next)) { + return next; + } + if (next) + next += sizeof(struct tdb_used_record); + } + return 0; +} + +/* This is the core routine which searches the hashtable for an entry. + * On error, no locks are held and -ve is returned. + * Otherwise, hinfo is filled in (and the optional tinfo). + * If not found, the return value is 0. + * If found, the return value is the offset, and *rec is the record. */ +tdb_off_t find_and_lock(struct tdb_context *tdb, + struct tdb_data key, + int ltype, + struct hash_info *h, + struct tdb_used_record *rec, + struct traverse_info *tinfo) +{ + uint32_t i, group; + tdb_off_t hashtable; + enum TDB_ERROR ecode; + + h->h = tdb_hash(tdb, key.dptr, key.dsize); + h->hash_used = 0; + group = use_bits(h, TDB_TOPLEVEL_HASH_BITS - TDB_HASH_GROUP_BITS); + h->home_bucket = use_bits(h, TDB_HASH_GROUP_BITS); + + h->hlock_start = hlock_range(group, &h->hlock_range); + ecode = tdb_lock_hashes(tdb, h->hlock_start, h->hlock_range, ltype, + TDB_LOCK_WAIT); + if (ecode != TDB_SUCCESS) { + return ecode; + } + + hashtable = offsetof(struct tdb_header, hashtable); + if (tinfo) { + tinfo->toplevel_group = group; + tinfo->num_levels = 1; + tinfo->levels[0].entry = 0; + tinfo->levels[0].hashtable = hashtable + + (group << TDB_HASH_GROUP_BITS) * sizeof(tdb_off_t); + tinfo->levels[0].total_buckets = 1 << TDB_HASH_GROUP_BITS; + } + + while (h->hash_used <= 64) { + /* Read in the hash group. */ + h->group_start = hashtable + + group * (sizeof(tdb_off_t) << TDB_HASH_GROUP_BITS); + + ecode = tdb_read_convert(tdb, h->group_start, &h->group, + sizeof(h->group)); + if (ecode != TDB_SUCCESS) { + goto fail; + } + + /* Pointer to another hash table? Go down... */ + if (is_subhash(h->group[h->home_bucket])) { + hashtable = (h->group[h->home_bucket] & TDB_OFF_MASK) + + sizeof(struct tdb_used_record); + if (tinfo) { + /* When we come back, use *next* bucket */ + tinfo->levels[tinfo->num_levels-1].entry + += h->home_bucket + 1; + } + group = use_bits(h, TDB_SUBLEVEL_HASH_BITS + - TDB_HASH_GROUP_BITS); + h->home_bucket = use_bits(h, TDB_HASH_GROUP_BITS); + if (tinfo) { + tinfo->levels[tinfo->num_levels].hashtable + = hashtable; + tinfo->levels[tinfo->num_levels].total_buckets + = 1 << TDB_SUBLEVEL_HASH_BITS; + tinfo->levels[tinfo->num_levels].entry + = group << TDB_HASH_GROUP_BITS; + tinfo->num_levels++; + } + continue; + } + + /* It's in this group: search (until 0 or all searched) */ + for (i = 0, h->found_bucket = h->home_bucket; + i < (1 << TDB_HASH_GROUP_BITS); + i++, h->found_bucket = ((h->found_bucket+1) + % (1 << TDB_HASH_GROUP_BITS))) { + tdb_bool_err berr; + if (is_subhash(h->group[h->found_bucket])) + continue; + + if (!h->group[h->found_bucket]) + break; + + berr = match(tdb, h, &key, h->group[h->found_bucket], + rec); + if (berr < 0) { + ecode = berr; + goto fail; + } + if (berr) { + if (tinfo) { + tinfo->levels[tinfo->num_levels-1].entry + += h->found_bucket; + } + return h->group[h->found_bucket] & TDB_OFF_MASK; + } + } + /* Didn't find it: h indicates where it would go. */ + return 0; + } + + return find_in_chain(tdb, key, hashtable, h, rec, tinfo); + +fail: + tdb_unlock_hashes(tdb, h->hlock_start, h->hlock_range, ltype); + return ecode; +} + +/* I wrote a simple test, expanding a hash to 2GB, for the following + * cases: + * 1) Expanding all the buckets at once, + * 2) Expanding the bucket we wanted to place the new entry into. + * 3) Expanding the most-populated bucket, + * + * I measured the worst/average/best density during this process. + * 1) 3%/16%/30% + * 2) 4%/20%/38% + * 3) 6%/22%/41% + * + * So we figure out the busiest bucket for the moment. + */ +static unsigned fullest_bucket(struct tdb_context *tdb, + const tdb_off_t *group, + unsigned new_bucket) +{ + unsigned counts[1 << TDB_HASH_GROUP_BITS] = { 0 }; + unsigned int i, best_bucket; + + /* Count the new entry. */ + counts[new_bucket]++; + best_bucket = new_bucket; + + for (i = 0; i < (1 << TDB_HASH_GROUP_BITS); i++) { + unsigned this_bucket; + + if (is_subhash(group[i])) + continue; + this_bucket = group[i] & TDB_OFF_HASH_GROUP_MASK; + if (++counts[this_bucket] > counts[best_bucket]) + best_bucket = this_bucket; + } + + return best_bucket; +} + +static bool put_into_group(tdb_off_t *group, + unsigned bucket, tdb_off_t encoded) +{ + unsigned int i; + + for (i = 0; i < (1 << TDB_HASH_GROUP_BITS); i++) { + unsigned b = (bucket + i) % (1 << TDB_HASH_GROUP_BITS); + + if (group[b] == 0) { + group[b] = encoded; + return true; + } + } + return false; +} + +static void force_into_group(tdb_off_t *group, + unsigned bucket, tdb_off_t encoded) +{ + if (!put_into_group(group, bucket, encoded)) + abort(); +} + +static tdb_off_t encode_offset(tdb_off_t new_off, struct hash_info *h) +{ + return h->home_bucket + | new_off + | ((uint64_t)bits_from(h->h, + 64 - h->hash_used - TDB_OFF_UPPER_STEAL_EXTRA, + TDB_OFF_UPPER_STEAL_EXTRA) + << TDB_OFF_HASH_EXTRA_BIT); +} + +/* Simply overwrite the hash entry we found before. */ +enum TDB_ERROR replace_in_hash(struct tdb_context *tdb, + struct hash_info *h, + tdb_off_t new_off) +{ + return tdb_write_off(tdb, hbucket_off(h->group_start, h->found_bucket), + encode_offset(new_off, h)); +} + +/* We slot in anywhere that's empty in the chain. */ +static enum TDB_ERROR COLD add_to_chain(struct tdb_context *tdb, + tdb_off_t subhash, + tdb_off_t new_off) +{ + tdb_off_t entry; + enum TDB_ERROR ecode; + + entry = tdb_find_zero_off(tdb, subhash, 1<<TDB_HASH_GROUP_BITS); + if (TDB_OFF_IS_ERR(entry)) { + return entry; + } + + if (entry == 1 << TDB_HASH_GROUP_BITS) { + tdb_off_t next; + + next = tdb_read_off(tdb, subhash + + offsetof(struct tdb_chain, next)); + if (TDB_OFF_IS_ERR(next)) { + return next; + } + + if (!next) { + next = alloc(tdb, 0, sizeof(struct tdb_chain), 0, + TDB_CHAIN_MAGIC, false); + if (TDB_OFF_IS_ERR(next)) + return next; + ecode = zero_out(tdb, + next+sizeof(struct tdb_used_record), + sizeof(struct tdb_chain)); + if (ecode != TDB_SUCCESS) { + return ecode; + } + ecode = tdb_write_off(tdb, subhash + + offsetof(struct tdb_chain, + next), + next); + if (ecode != TDB_SUCCESS) { + return ecode; + } + } + return add_to_chain(tdb, next, new_off); + } + + return tdb_write_off(tdb, subhash + entry * sizeof(tdb_off_t), + new_off); +} + +/* Add into a newly created subhash. */ +static enum TDB_ERROR add_to_subhash(struct tdb_context *tdb, tdb_off_t subhash, + unsigned hash_used, tdb_off_t val) +{ + tdb_off_t off = (val & TDB_OFF_MASK), *group; + struct hash_info h; + unsigned int gnum; + + h.hash_used = hash_used; + + if (hash_used + TDB_SUBLEVEL_HASH_BITS > 64) + return add_to_chain(tdb, subhash, off); + + h.h = hash_record(tdb, off); + gnum = use_bits(&h, TDB_SUBLEVEL_HASH_BITS-TDB_HASH_GROUP_BITS); + h.group_start = subhash + + gnum * (sizeof(tdb_off_t) << TDB_HASH_GROUP_BITS); + h.home_bucket = use_bits(&h, TDB_HASH_GROUP_BITS); + + group = tdb_access_write(tdb, h.group_start, + sizeof(*group) << TDB_HASH_GROUP_BITS, true); + if (TDB_PTR_IS_ERR(group)) { + return TDB_PTR_ERR(group); + } + force_into_group(group, h.home_bucket, encode_offset(off, &h)); + return tdb_access_commit(tdb, group); +} + +static enum TDB_ERROR expand_group(struct tdb_context *tdb, struct hash_info *h) +{ + unsigned bucket, num_vals, i, magic; + size_t subsize; + tdb_off_t subhash; + tdb_off_t vals[1 << TDB_HASH_GROUP_BITS]; + enum TDB_ERROR ecode; + + /* Attach new empty subhash under fullest bucket. */ + bucket = fullest_bucket(tdb, h->group, h->home_bucket); + + if (h->hash_used == 64) { + tdb->stats.alloc_chain++; + subsize = sizeof(struct tdb_chain); + magic = TDB_CHAIN_MAGIC; + } else { + tdb->stats.alloc_subhash++; + subsize = (sizeof(tdb_off_t) << TDB_SUBLEVEL_HASH_BITS); + magic = TDB_HTABLE_MAGIC; + } + + subhash = alloc(tdb, 0, subsize, 0, magic, false); + if (TDB_OFF_IS_ERR(subhash)) { + return subhash; + } + + ecode = zero_out(tdb, subhash + sizeof(struct tdb_used_record), + subsize); + if (ecode != TDB_SUCCESS) { + return ecode; + } + + /* Remove any which are destined for bucket or are in wrong place. */ + num_vals = 0; + for (i = 0; i < (1 << TDB_HASH_GROUP_BITS); i++) { + unsigned home_bucket = h->group[i] & TDB_OFF_HASH_GROUP_MASK; + if (!h->group[i] || is_subhash(h->group[i])) + continue; + if (home_bucket == bucket || home_bucket != i) { + vals[num_vals++] = h->group[i]; + h->group[i] = 0; + } + } + /* FIXME: This assert is valid, but we do this during unit test :( */ + /* assert(num_vals); */ + + /* Overwrite expanded bucket with subhash pointer. */ + h->group[bucket] = subhash | (1ULL << TDB_OFF_UPPER_STEAL_SUBHASH_BIT); + + /* Point to actual contents of record. */ + subhash += sizeof(struct tdb_used_record); + + /* Put values back. */ + for (i = 0; i < num_vals; i++) { + unsigned this_bucket = vals[i] & TDB_OFF_HASH_GROUP_MASK; + + if (this_bucket == bucket) { + ecode = add_to_subhash(tdb, subhash, h->hash_used, + vals[i]); + if (ecode != TDB_SUCCESS) + return ecode; + } else { + /* There should be room to put this back. */ + force_into_group(h->group, this_bucket, vals[i]); + } + } + return TDB_SUCCESS; +} + +enum TDB_ERROR delete_from_hash(struct tdb_context *tdb, struct hash_info *h) +{ + unsigned int i, num_movers = 0; + tdb_off_t movers[1 << TDB_HASH_GROUP_BITS]; + + h->group[h->found_bucket] = 0; + for (i = 1; i < (1 << TDB_HASH_GROUP_BITS); i++) { + unsigned this_bucket; + + this_bucket = (h->found_bucket+i) % (1 << TDB_HASH_GROUP_BITS); + /* Empty bucket? We're done. */ + if (!h->group[this_bucket]) + break; + + /* Ignore subhashes. */ + if (is_subhash(h->group[this_bucket])) + continue; + + /* If this one is not happy where it is, we'll move it. */ + if ((h->group[this_bucket] & TDB_OFF_HASH_GROUP_MASK) + != this_bucket) { + movers[num_movers++] = h->group[this_bucket]; + h->group[this_bucket] = 0; + } + } + + /* Put back the ones we erased. */ + for (i = 0; i < num_movers; i++) { + force_into_group(h->group, movers[i] & TDB_OFF_HASH_GROUP_MASK, + movers[i]); + } + + /* Now we write back the hash group */ + return tdb_write_convert(tdb, h->group_start, + h->group, sizeof(h->group)); +} + +enum TDB_ERROR add_to_hash(struct tdb_context *tdb, struct hash_info *h, + tdb_off_t new_off) +{ + enum TDB_ERROR ecode; + + /* We hit an empty bucket during search? That's where it goes. */ + if (!h->group[h->found_bucket]) { + h->group[h->found_bucket] = encode_offset(new_off, h); + /* Write back the modified group. */ + return tdb_write_convert(tdb, h->group_start, + h->group, sizeof(h->group)); + } + + if (h->hash_used > 64) + return add_to_chain(tdb, h->group_start, new_off); + + /* We're full. Expand. */ + ecode = expand_group(tdb, h); + if (ecode != TDB_SUCCESS) { + return ecode; + } + + if (is_subhash(h->group[h->home_bucket])) { + /* We were expanded! */ + tdb_off_t hashtable; + unsigned int gnum; + + /* Write back the modified group. */ + ecode = tdb_write_convert(tdb, h->group_start, h->group, + sizeof(h->group)); + if (ecode != TDB_SUCCESS) { + return ecode; + } + + /* Move hashinfo down a level. */ + hashtable = (h->group[h->home_bucket] & TDB_OFF_MASK) + + sizeof(struct tdb_used_record); + gnum = use_bits(h,TDB_SUBLEVEL_HASH_BITS - TDB_HASH_GROUP_BITS); + h->home_bucket = use_bits(h, TDB_HASH_GROUP_BITS); + h->group_start = hashtable + + gnum * (sizeof(tdb_off_t) << TDB_HASH_GROUP_BITS); + ecode = tdb_read_convert(tdb, h->group_start, &h->group, + sizeof(h->group)); + if (ecode != TDB_SUCCESS) { + return ecode; + } + } + + /* Expanding the group must have made room if it didn't choose this + * bucket. */ + if (put_into_group(h->group, h->home_bucket, encode_offset(new_off,h))){ + return tdb_write_convert(tdb, h->group_start, + h->group, sizeof(h->group)); + } + + /* This can happen if all hashes in group (and us) dropped into same + * group in subhash. */ + return add_to_hash(tdb, h, new_off); +} + +/* Traverse support: returns offset of record, or 0 or -ve error. */ +static tdb_off_t iterate_hash(struct tdb_context *tdb, + struct traverse_info *tinfo) +{ + tdb_off_t off, val, i; + struct traverse_level *tlevel; + + tlevel = &tinfo->levels[tinfo->num_levels-1]; + +again: + for (i = tdb_find_nonzero_off(tdb, tlevel->hashtable, + tlevel->entry, tlevel->total_buckets); + i != tlevel->total_buckets; + i = tdb_find_nonzero_off(tdb, tlevel->hashtable, + i+1, tlevel->total_buckets)) { + if (TDB_OFF_IS_ERR(i)) { + return i; + } + + val = tdb_read_off(tdb, tlevel->hashtable+sizeof(tdb_off_t)*i); + if (TDB_OFF_IS_ERR(val)) { + return val; + } + + off = val & TDB_OFF_MASK; + + /* This makes the delete-all-in-traverse case work + * (and simplifies our logic a little). */ + if (off == tinfo->prev) + continue; + + tlevel->entry = i; + + if (!is_subhash(val)) { + /* Found one. */ + tinfo->prev = off; + return off; + } + + /* When we come back, we want the next one */ + tlevel->entry++; + tinfo->num_levels++; + tlevel++; + tlevel->hashtable = off + sizeof(struct tdb_used_record); + tlevel->entry = 0; + /* Next level is a chain? */ + if (unlikely(tinfo->num_levels == TDB_MAX_LEVELS + 1)) + tlevel->total_buckets = (1 << TDB_HASH_GROUP_BITS); + else + tlevel->total_buckets = (1 << TDB_SUBLEVEL_HASH_BITS); + goto again; + } + + /* Nothing there? */ + if (tinfo->num_levels == 1) + return 0; + + /* Handle chained entries. */ + if (unlikely(tinfo->num_levels == TDB_MAX_LEVELS + 1)) { + tlevel->hashtable = tdb_read_off(tdb, tlevel->hashtable + + offsetof(struct tdb_chain, + next)); + if (TDB_OFF_IS_ERR(tlevel->hashtable)) { + return tlevel->hashtable; + } + if (tlevel->hashtable) { + tlevel->hashtable += sizeof(struct tdb_used_record); + tlevel->entry = 0; + goto again; + } + } + + /* Go back up and keep searching. */ + tinfo->num_levels--; + tlevel--; + goto again; +} + +/* Return success if we find something, TDB_ERR_NOEXIST if none. */ +enum TDB_ERROR next_in_hash(struct tdb_context *tdb, + struct traverse_info *tinfo, + TDB_DATA *kbuf, size_t *dlen) +{ + const unsigned group_bits = TDB_TOPLEVEL_HASH_BITS-TDB_HASH_GROUP_BITS; + tdb_off_t hl_start, hl_range, off; + enum TDB_ERROR ecode; + + while (tinfo->toplevel_group < (1 << group_bits)) { + hl_start = (tdb_off_t)tinfo->toplevel_group + << (64 - group_bits); + hl_range = 1ULL << group_bits; + ecode = tdb_lock_hashes(tdb, hl_start, hl_range, F_RDLCK, + TDB_LOCK_WAIT); + if (ecode != TDB_SUCCESS) { + return ecode; + } + + off = iterate_hash(tdb, tinfo); + if (off) { + struct tdb_used_record rec; + + if (TDB_OFF_IS_ERR(off)) { + ecode = off; + goto fail; + } + + ecode = tdb_read_convert(tdb, off, &rec, sizeof(rec)); + if (ecode != TDB_SUCCESS) { + goto fail; + } + if (rec_magic(&rec) != TDB_USED_MAGIC) { + ecode = tdb_logerr(tdb, TDB_ERR_CORRUPT, + TDB_LOG_ERROR, + "next_in_hash:" + " corrupt record at %llu", + (long long)off); + goto fail; + } + + kbuf->dsize = rec_key_length(&rec); + + /* They want data as well? */ + if (dlen) { + *dlen = rec_data_length(&rec); + kbuf->dptr = tdb_alloc_read(tdb, + off + sizeof(rec), + kbuf->dsize + + *dlen); + } else { + kbuf->dptr = tdb_alloc_read(tdb, + off + sizeof(rec), + kbuf->dsize); + } + tdb_unlock_hashes(tdb, hl_start, hl_range, F_RDLCK); + if (TDB_PTR_IS_ERR(kbuf->dptr)) { + return TDB_PTR_ERR(kbuf->dptr); + } + return TDB_SUCCESS; + } + + tdb_unlock_hashes(tdb, hl_start, hl_range, F_RDLCK); + + tinfo->toplevel_group++; + tinfo->levels[0].hashtable + += (sizeof(tdb_off_t) << TDB_HASH_GROUP_BITS); + tinfo->levels[0].entry = 0; + } + return TDB_ERR_NOEXIST; + +fail: + tdb_unlock_hashes(tdb, hl_start, hl_range, F_RDLCK); + return ecode; + +} + +enum TDB_ERROR first_in_hash(struct tdb_context *tdb, + struct traverse_info *tinfo, + TDB_DATA *kbuf, size_t *dlen) +{ + tinfo->prev = 0; + tinfo->toplevel_group = 0; + tinfo->num_levels = 1; + tinfo->levels[0].hashtable = offsetof(struct tdb_header, hashtable); + tinfo->levels[0].entry = 0; + tinfo->levels[0].total_buckets = (1 << TDB_HASH_GROUP_BITS); + + return next_in_hash(tdb, tinfo, kbuf, dlen); +} + +/* Even if the entry isn't in this hash bucket, you'd have to lock this + * bucket to find it. */ +static enum TDB_ERROR chainlock(struct tdb_context *tdb, const TDB_DATA *key, + int ltype, enum tdb_lock_flags waitflag, + const char *func) +{ + enum TDB_ERROR ecode; + uint64_t h = tdb_hash(tdb, key->dptr, key->dsize); + tdb_off_t lockstart, locksize; + unsigned int group, gbits; + + gbits = TDB_TOPLEVEL_HASH_BITS - TDB_HASH_GROUP_BITS; + group = bits_from(h, 64 - gbits, gbits); + + lockstart = hlock_range(group, &locksize); + + ecode = tdb_lock_hashes(tdb, lockstart, locksize, ltype, waitflag); + tdb_trace_1rec(tdb, func, *key); + return ecode; +} + +/* lock/unlock one hash chain. This is meant to be used to reduce + contention - it cannot guarantee how many records will be locked */ +enum TDB_ERROR tdb_chainlock(struct tdb_context *tdb, TDB_DATA key) +{ + return tdb->last_error = chainlock(tdb, &key, F_WRLCK, TDB_LOCK_WAIT, + "tdb_chainlock"); +} + +void tdb_chainunlock(struct tdb_context *tdb, TDB_DATA key) +{ + uint64_t h = tdb_hash(tdb, key.dptr, key.dsize); + tdb_off_t lockstart, locksize; + unsigned int group, gbits; + + gbits = TDB_TOPLEVEL_HASH_BITS - TDB_HASH_GROUP_BITS; + group = bits_from(h, 64 - gbits, gbits); + + lockstart = hlock_range(group, &locksize); + + tdb_trace_1rec(tdb, "tdb_chainunlock", key); + tdb_unlock_hashes(tdb, lockstart, locksize, F_WRLCK); +} + +enum TDB_ERROR tdb_chainlock_read(struct tdb_context *tdb, TDB_DATA key) +{ + return tdb->last_error = chainlock(tdb, &key, F_RDLCK, TDB_LOCK_WAIT, + "tdb_chainlock_read"); +} + +void tdb_chainunlock_read(struct tdb_context *tdb, TDB_DATA key) +{ + uint64_t h = tdb_hash(tdb, key.dptr, key.dsize); + tdb_off_t lockstart, locksize; + unsigned int group, gbits; + + gbits = TDB_TOPLEVEL_HASH_BITS - TDB_HASH_GROUP_BITS; + group = bits_from(h, 64 - gbits, gbits); + + lockstart = hlock_range(group, &locksize); + + tdb_trace_1rec(tdb, "tdb_chainunlock_read", key); + tdb_unlock_hashes(tdb, lockstart, locksize, F_RDLCK); +} diff --git a/lib/tdb2/io.c b/lib/tdb2/io.c new file mode 100644 index 0000000000..8c5f45f308 --- /dev/null +++ b/lib/tdb2/io.c @@ -0,0 +1,615 @@ + /* + Unix SMB/CIFS implementation. + + trivial database library + + Copyright (C) Andrew Tridgell 1999-2005 + Copyright (C) Paul `Rusty' Russell 2000 + Copyright (C) Jeremy Allison 2000-2003 + Copyright (C) Rusty Russell 2010 + + ** NOTE! The following LGPL license applies to the tdb + ** library. This does NOT imply that all of Samba is released + ** under the LGPL + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 3 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, see <http://www.gnu.org/licenses/>. +*/ +#include "private.h" +#include <assert.h> +#include <ccan/likely/likely.h> + +void tdb_munmap(struct tdb_file *file) +{ + if (file->fd == -1) + return; + + if (file->map_ptr) { + munmap(file->map_ptr, file->map_size); + file->map_ptr = NULL; + } +} + +void tdb_mmap(struct tdb_context *tdb) +{ + if (tdb->flags & TDB_INTERNAL) + return; + + if (tdb->flags & TDB_NOMMAP) + return; + + /* size_t can be smaller than off_t. */ + if ((size_t)tdb->file->map_size == tdb->file->map_size) { + tdb->file->map_ptr = mmap(NULL, tdb->file->map_size, + tdb->mmap_flags, + MAP_SHARED, tdb->file->fd, 0); + } else + tdb->file->map_ptr = MAP_FAILED; + + /* + * NB. When mmap fails it returns MAP_FAILED *NOT* NULL !!!! + */ + if (tdb->file->map_ptr == MAP_FAILED) { + tdb->file->map_ptr = NULL; + tdb_logerr(tdb, TDB_SUCCESS, TDB_LOG_WARNING, + "tdb_mmap failed for size %lld (%s)", + (long long)tdb->file->map_size, strerror(errno)); + } +} + +/* check for an out of bounds access - if it is out of bounds then + see if the database has been expanded by someone else and expand + if necessary + note that "len" is the minimum length needed for the db +*/ +static enum TDB_ERROR tdb_oob(struct tdb_context *tdb, tdb_off_t len, + bool probe) +{ + struct stat st; + enum TDB_ERROR ecode; + + /* We can't hold pointers during this: we could unmap! */ + assert(!tdb->direct_access + || (tdb->flags & TDB_NOLOCK) + || tdb_has_expansion_lock(tdb)); + + if (len <= tdb->file->map_size) + return 0; + if (tdb->flags & TDB_INTERNAL) { + if (!probe) { + tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR, + "tdb_oob len %lld beyond internal" + " malloc size %lld", + (long long)len, + (long long)tdb->file->map_size); + } + return TDB_ERR_IO; + } + + ecode = tdb_lock_expand(tdb, F_RDLCK); + if (ecode != TDB_SUCCESS) { + return ecode; + } + + if (fstat(tdb->file->fd, &st) != 0) { + tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR, + "Failed to fstat file: %s", strerror(errno)); + tdb_unlock_expand(tdb, F_RDLCK); + return TDB_ERR_IO; + } + + tdb_unlock_expand(tdb, F_RDLCK); + + if (st.st_size < (size_t)len) { + if (!probe) { + tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR, + "tdb_oob len %zu beyond eof at %zu", + (size_t)len, st.st_size); + } + return TDB_ERR_IO; + } + + /* Unmap, update size, remap */ + tdb_munmap(tdb->file); + + tdb->file->map_size = st.st_size; + tdb_mmap(tdb); + return TDB_SUCCESS; +} + +/* Endian conversion: we only ever deal with 8 byte quantities */ +void *tdb_convert(const struct tdb_context *tdb, void *buf, tdb_len_t size) +{ + assert(size % 8 == 0); + if (unlikely((tdb->flags & TDB_CONVERT)) && buf) { + uint64_t i, *p = (uint64_t *)buf; + for (i = 0; i < size / 8; i++) + p[i] = bswap_64(p[i]); + } + return buf; +} + +/* Return first non-zero offset in offset array, or end, or -ve error. */ +/* FIXME: Return the off? */ +uint64_t tdb_find_nonzero_off(struct tdb_context *tdb, + tdb_off_t base, uint64_t start, uint64_t end) +{ + uint64_t i; + const uint64_t *val; + + /* Zero vs non-zero is the same unconverted: minor optimization. */ + val = tdb_access_read(tdb, base + start * sizeof(tdb_off_t), + (end - start) * sizeof(tdb_off_t), false); + if (TDB_PTR_IS_ERR(val)) { + return TDB_PTR_ERR(val); + } + + for (i = 0; i < (end - start); i++) { + if (val[i]) + break; + } + tdb_access_release(tdb, val); + return start + i; +} + +/* Return first zero offset in num offset array, or num, or -ve error. */ +uint64_t tdb_find_zero_off(struct tdb_context *tdb, tdb_off_t off, + uint64_t num) +{ + uint64_t i; + const uint64_t *val; + + /* Zero vs non-zero is the same unconverted: minor optimization. */ + val = tdb_access_read(tdb, off, num * sizeof(tdb_off_t), false); + if (TDB_PTR_IS_ERR(val)) { + return TDB_PTR_ERR(val); + } + + for (i = 0; i < num; i++) { + if (!val[i]) + break; + } + tdb_access_release(tdb, val); + return i; +} + +enum TDB_ERROR zero_out(struct tdb_context *tdb, tdb_off_t off, tdb_len_t len) +{ + char buf[8192] = { 0 }; + void *p = tdb->methods->direct(tdb, off, len, true); + enum TDB_ERROR ecode = TDB_SUCCESS; + + assert(!tdb->read_only); + if (TDB_PTR_IS_ERR(p)) { + return TDB_PTR_ERR(p); + } + if (p) { + memset(p, 0, len); + return ecode; + } + while (len) { + unsigned todo = len < sizeof(buf) ? len : sizeof(buf); + ecode = tdb->methods->twrite(tdb, off, buf, todo); + if (ecode != TDB_SUCCESS) { + break; + } + len -= todo; + off += todo; + } + return ecode; +} + +tdb_off_t tdb_read_off(struct tdb_context *tdb, tdb_off_t off) +{ + tdb_off_t ret; + enum TDB_ERROR ecode; + + if (likely(!(tdb->flags & TDB_CONVERT))) { + tdb_off_t *p = tdb->methods->direct(tdb, off, sizeof(*p), + false); + if (TDB_PTR_IS_ERR(p)) { + return TDB_PTR_ERR(p); + } + if (p) + return *p; + } + + ecode = tdb_read_convert(tdb, off, &ret, sizeof(ret)); + if (ecode != TDB_SUCCESS) { + return ecode; + } + return ret; +} + +/* write a lump of data at a specified offset */ +static enum TDB_ERROR tdb_write(struct tdb_context *tdb, tdb_off_t off, + const void *buf, tdb_len_t len) +{ + enum TDB_ERROR ecode; + + if (tdb->read_only) { + return tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_LOG_USE_ERROR, + "Write to read-only database"); + } + + ecode = tdb->methods->oob(tdb, off + len, 0); + if (ecode != TDB_SUCCESS) { + return ecode; + } + + if (tdb->file->map_ptr) { + memcpy(off + (char *)tdb->file->map_ptr, buf, len); + } else { + ssize_t ret; + ret = pwrite(tdb->file->fd, buf, len, off); + if (ret != len) { + /* This shouldn't happen: we avoid sparse files. */ + if (ret >= 0) + errno = ENOSPC; + + return tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR, + "tdb_write: %zi at %zu len=%zu (%s)", + ret, (size_t)off, (size_t)len, + strerror(errno)); + } + } + return TDB_SUCCESS; +} + +/* read a lump of data at a specified offset */ +static enum TDB_ERROR tdb_read(struct tdb_context *tdb, tdb_off_t off, + void *buf, tdb_len_t len) +{ + enum TDB_ERROR ecode; + + ecode = tdb->methods->oob(tdb, off + len, 0); + if (ecode != TDB_SUCCESS) { + return ecode; + } + + if (tdb->file->map_ptr) { + memcpy(buf, off + (char *)tdb->file->map_ptr, len); + } else { + ssize_t r = pread(tdb->file->fd, buf, len, off); + if (r != len) { + return tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR, + "tdb_read failed with %zi at %zu " + "len=%zu (%s) map_size=%zu", + r, (size_t)off, (size_t)len, + strerror(errno), + (size_t)tdb->file->map_size); + } + } + return TDB_SUCCESS; +} + +enum TDB_ERROR tdb_write_convert(struct tdb_context *tdb, tdb_off_t off, + const void *rec, size_t len) +{ + enum TDB_ERROR ecode; + + if (unlikely((tdb->flags & TDB_CONVERT))) { + void *conv = malloc(len); + if (!conv) { + return tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR, + "tdb_write: no memory converting" + " %zu bytes", len); + } + memcpy(conv, rec, len); + ecode = tdb->methods->twrite(tdb, off, + tdb_convert(tdb, conv, len), len); + free(conv); + } else { + ecode = tdb->methods->twrite(tdb, off, rec, len); + } + return ecode; +} + +enum TDB_ERROR tdb_read_convert(struct tdb_context *tdb, tdb_off_t off, + void *rec, size_t len) +{ + enum TDB_ERROR ecode = tdb->methods->tread(tdb, off, rec, len); + tdb_convert(tdb, rec, len); + return ecode; +} + +enum TDB_ERROR tdb_write_off(struct tdb_context *tdb, + tdb_off_t off, tdb_off_t val) +{ + if (tdb->read_only) { + return tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_LOG_USE_ERROR, + "Write to read-only database"); + } + + if (likely(!(tdb->flags & TDB_CONVERT))) { + tdb_off_t *p = tdb->methods->direct(tdb, off, sizeof(*p), + true); + if (TDB_PTR_IS_ERR(p)) { + return TDB_PTR_ERR(p); + } + if (p) { + *p = val; + return TDB_SUCCESS; + } + } + return tdb_write_convert(tdb, off, &val, sizeof(val)); +} + +static void *_tdb_alloc_read(struct tdb_context *tdb, tdb_off_t offset, + tdb_len_t len, unsigned int prefix) +{ + unsigned char *buf; + enum TDB_ERROR ecode; + + /* some systems don't like zero length malloc */ + buf = malloc(prefix + len ? prefix + len : 1); + if (!buf) { + tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_USE_ERROR, + "tdb_alloc_read malloc failed len=%zu", + (size_t)(prefix + len)); + return TDB_ERR_PTR(TDB_ERR_OOM); + } else { + ecode = tdb->methods->tread(tdb, offset, buf+prefix, len); + if (unlikely(ecode != TDB_SUCCESS)) { + free(buf); + return TDB_ERR_PTR(ecode); + } + } + return buf; +} + +/* read a lump of data, allocating the space for it */ +void *tdb_alloc_read(struct tdb_context *tdb, tdb_off_t offset, tdb_len_t len) +{ + return _tdb_alloc_read(tdb, offset, len, 0); +} + +static enum TDB_ERROR fill(struct tdb_context *tdb, + const void *buf, size_t size, + tdb_off_t off, tdb_len_t len) +{ + while (len) { + size_t n = len > size ? size : len; + ssize_t ret = pwrite(tdb->file->fd, buf, n, off); + if (ret != n) { + if (ret >= 0) + errno = ENOSPC; + + return tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR, + "fill failed:" + " %zi at %zu len=%zu (%s)", + ret, (size_t)off, (size_t)len, + strerror(errno)); + } + len -= n; + off += n; + } + return TDB_SUCCESS; +} + +/* expand a file. we prefer to use ftruncate, as that is what posix + says to use for mmap expansion */ +static enum TDB_ERROR tdb_expand_file(struct tdb_context *tdb, + tdb_len_t addition) +{ + char buf[8192]; + enum TDB_ERROR ecode; + + if (tdb->read_only) { + return tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_LOG_USE_ERROR, + "Expand on read-only database"); + } + + if (tdb->flags & TDB_INTERNAL) { + char *new = realloc(tdb->file->map_ptr, + tdb->file->map_size + addition); + if (!new) { + return tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR, + "No memory to expand database"); + } + tdb->file->map_ptr = new; + tdb->file->map_size += addition; + } else { + /* Unmap before trying to write; old TDB claimed OpenBSD had + * problem with this otherwise. */ + tdb_munmap(tdb->file); + + /* If this fails, we try to fill anyway. */ + if (ftruncate(tdb->file->fd, tdb->file->map_size + addition)) + ; + + /* now fill the file with something. This ensures that the + file isn't sparse, which would be very bad if we ran out of + disk. This must be done with write, not via mmap */ + memset(buf, 0x43, sizeof(buf)); + ecode = fill(tdb, buf, sizeof(buf), tdb->file->map_size, + addition); + if (ecode != TDB_SUCCESS) + return ecode; + tdb->file->map_size += addition; + tdb_mmap(tdb); + } + return TDB_SUCCESS; +} + +const void *tdb_access_read(struct tdb_context *tdb, + tdb_off_t off, tdb_len_t len, bool convert) +{ + void *ret = NULL; + + if (likely(!(tdb->flags & TDB_CONVERT))) { + ret = tdb->methods->direct(tdb, off, len, false); + + if (TDB_PTR_IS_ERR(ret)) { + return ret; + } + } + if (!ret) { + struct tdb_access_hdr *hdr; + hdr = _tdb_alloc_read(tdb, off, len, sizeof(*hdr)); + if (TDB_PTR_IS_ERR(hdr)) { + return hdr; + } + hdr->next = tdb->access; + tdb->access = hdr; + ret = hdr + 1; + if (convert) { + tdb_convert(tdb, (void *)ret, len); + } + } else + tdb->direct_access++; + + return ret; +} + +void *tdb_access_write(struct tdb_context *tdb, + tdb_off_t off, tdb_len_t len, bool convert) +{ + void *ret = NULL; + + if (tdb->read_only) { + tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_LOG_USE_ERROR, + "Write to read-only database"); + return TDB_ERR_PTR(TDB_ERR_RDONLY); + } + + if (likely(!(tdb->flags & TDB_CONVERT))) { + ret = tdb->methods->direct(tdb, off, len, true); + + if (TDB_PTR_IS_ERR(ret)) { + return ret; + } + } + + if (!ret) { + struct tdb_access_hdr *hdr; + hdr = _tdb_alloc_read(tdb, off, len, sizeof(*hdr)); + if (TDB_PTR_IS_ERR(hdr)) { + return hdr; + } + hdr->next = tdb->access; + tdb->access = hdr; + hdr->off = off; + hdr->len = len; + hdr->convert = convert; + ret = hdr + 1; + if (convert) + tdb_convert(tdb, (void *)ret, len); + } else + tdb->direct_access++; + + return ret; +} + +static struct tdb_access_hdr **find_hdr(struct tdb_context *tdb, const void *p) +{ + struct tdb_access_hdr **hp; + + for (hp = &tdb->access; *hp; hp = &(*hp)->next) { + if (*hp + 1 == p) + return hp; + } + return NULL; +} + +void tdb_access_release(struct tdb_context *tdb, const void *p) +{ + struct tdb_access_hdr *hdr, **hp = find_hdr(tdb, p); + + if (hp) { + hdr = *hp; + *hp = hdr->next; + free(hdr); + } else + tdb->direct_access--; +} + +enum TDB_ERROR tdb_access_commit(struct tdb_context *tdb, void *p) +{ + struct tdb_access_hdr *hdr, **hp = find_hdr(tdb, p); + enum TDB_ERROR ecode; + + if (hp) { + hdr = *hp; + if (hdr->convert) + ecode = tdb_write_convert(tdb, hdr->off, p, hdr->len); + else + ecode = tdb_write(tdb, hdr->off, p, hdr->len); + *hp = hdr->next; + free(hdr); + } else { + tdb->direct_access--; + ecode = TDB_SUCCESS; + } + + return ecode; +} + +static void *tdb_direct(struct tdb_context *tdb, tdb_off_t off, size_t len, + bool write_mode) +{ + enum TDB_ERROR ecode; + + if (unlikely(!tdb->file->map_ptr)) + return NULL; + + ecode = tdb_oob(tdb, off + len, true); + if (unlikely(ecode != TDB_SUCCESS)) + return TDB_ERR_PTR(ecode); + return (char *)tdb->file->map_ptr + off; +} + +void tdb_inc_seqnum(struct tdb_context *tdb) +{ + tdb_off_t seq; + + if (likely(!(tdb->flags & TDB_CONVERT))) { + int64_t *direct; + + direct = tdb->methods->direct(tdb, + offsetof(struct tdb_header, + seqnum), + sizeof(*direct), true); + if (likely(direct)) { + /* Don't let it go negative, even briefly */ + if (unlikely((*direct) + 1) < 0) + *direct = 0; + (*direct)++; + return; + } + } + + seq = tdb_read_off(tdb, offsetof(struct tdb_header, seqnum)); + if (!TDB_OFF_IS_ERR(seq)) { + seq++; + if (unlikely((int64_t)seq < 0)) + seq = 0; + tdb_write_off(tdb, offsetof(struct tdb_header, seqnum), seq); + } +} + +static const struct tdb_methods io_methods = { + tdb_read, + tdb_write, + tdb_oob, + tdb_expand_file, + tdb_direct, +}; + +/* + initialise the default methods table +*/ +void tdb_io_init(struct tdb_context *tdb) +{ + tdb->methods = &io_methods; +} diff --git a/lib/tdb2/lock.c b/lib/tdb2/lock.c new file mode 100644 index 0000000000..76b8bc3157 --- /dev/null +++ b/lib/tdb2/lock.c @@ -0,0 +1,875 @@ + /* + Unix SMB/CIFS implementation. + + trivial database library + + Copyright (C) Andrew Tridgell 1999-2005 + Copyright (C) Paul `Rusty' Russell 2000 + Copyright (C) Jeremy Allison 2000-2003 + + ** NOTE! The following LGPL license applies to the tdb + ** library. This does NOT imply that all of Samba is released + ** under the LGPL + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 3 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, see <http://www.gnu.org/licenses/>. +*/ + +#include "private.h" +#include <assert.h> +#include <ccan/build_assert/build_assert.h> + +/* If we were threaded, we could wait for unlock, but we're not, so fail. */ +static enum TDB_ERROR owner_conflict(struct tdb_context *tdb, const char *call) +{ + return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_USE_ERROR, + "%s: lock owned by another tdb in this process.", + call); +} + +/* If we fork, we no longer really own locks. */ +static bool check_lock_pid(struct tdb_context *tdb, + const char *call, bool log) +{ + /* No locks? No problem! */ + if (tdb->file->allrecord_lock.count == 0 + && tdb->file->num_lockrecs == 0) { + return true; + } + + /* No fork? No problem! */ + if (tdb->file->locker == getpid()) { + return true; + } + + if (log) { + tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_USE_ERROR, + "%s: fork() detected after lock acquisition!" + " (%u vs %u)", call, tdb->file->locker, getpid()); + } + return false; +} + +int tdb_fcntl_lock(int fd, int rw, off_t off, off_t len, bool waitflag, + void *unused) +{ + struct flock fl; + int ret; + + do { + fl.l_type = rw; + fl.l_whence = SEEK_SET; + fl.l_start = off; + fl.l_len = len; + + if (waitflag) + ret = fcntl(fd, F_SETLKW, &fl); + else + ret = fcntl(fd, F_SETLK, &fl); + } while (ret != 0 && errno == EINTR); + return ret; +} + +int tdb_fcntl_unlock(int fd, int rw, off_t off, off_t len, void *unused) +{ + struct flock fl; + int ret; + + do { + fl.l_type = F_UNLCK; + fl.l_whence = SEEK_SET; + fl.l_start = off; + fl.l_len = len; + + ret = fcntl(fd, F_SETLKW, &fl); + } while (ret != 0 && errno == EINTR); + return ret; +} + +static int lock(struct tdb_context *tdb, + int rw, off_t off, off_t len, bool waitflag) +{ + int ret; + if (tdb->file->allrecord_lock.count == 0 + && tdb->file->num_lockrecs == 0) { + tdb->file->locker = getpid(); + } + + tdb->stats.lock_lowlevel++; + ret = tdb->lock_fn(tdb->file->fd, rw, off, len, waitflag, + tdb->lock_data); + if (!waitflag) { + tdb->stats.lock_nonblock++; + if (ret != 0) + tdb->stats.lock_nonblock_fail++; + } + return ret; +} + +static int unlock(struct tdb_context *tdb, int rw, off_t off, off_t len) +{ +#if 0 /* Check they matched up locks and unlocks correctly. */ + char line[80]; + FILE *locks; + bool found = false; + + locks = fopen("/proc/locks", "r"); + + while (fgets(line, 80, locks)) { + char *p; + int type, start, l; + + /* eg. 1: FLOCK ADVISORY WRITE 2440 08:01:2180826 0 EOF */ + p = strchr(line, ':') + 1; + if (strncmp(p, " POSIX ADVISORY ", strlen(" POSIX ADVISORY "))) + continue; + p += strlen(" FLOCK ADVISORY "); + if (strncmp(p, "READ ", strlen("READ ")) == 0) + type = F_RDLCK; + else if (strncmp(p, "WRITE ", strlen("WRITE ")) == 0) + type = F_WRLCK; + else + abort(); + p += 6; + if (atoi(p) != getpid()) + continue; + p = strchr(strchr(p, ' ') + 1, ' ') + 1; + start = atoi(p); + p = strchr(p, ' ') + 1; + if (strncmp(p, "EOF", 3) == 0) + l = 0; + else + l = atoi(p) - start + 1; + + if (off == start) { + if (len != l) { + fprintf(stderr, "Len %u should be %u: %s", + (int)len, l, line); + abort(); + } + if (type != rw) { + fprintf(stderr, "Type %s wrong: %s", + rw == F_RDLCK ? "READ" : "WRITE", line); + abort(); + } + found = true; + break; + } + } + + if (!found) { + fprintf(stderr, "Unlock on %u@%u not found!", + (int)off, (int)len); + abort(); + } + + fclose(locks); +#endif + + return tdb->unlock_fn(tdb->file->fd, rw, off, len, tdb->lock_data); +} + +/* a byte range locking function - return 0 on success + this functions locks len bytes at the specified offset. + + note that a len of zero means lock to end of file +*/ +static enum TDB_ERROR tdb_brlock(struct tdb_context *tdb, + int rw_type, tdb_off_t offset, tdb_off_t len, + enum tdb_lock_flags flags) +{ + int ret; + + if (tdb->flags & TDB_NOLOCK) { + return TDB_SUCCESS; + } + + if (rw_type == F_WRLCK && tdb->read_only) { + return tdb_logerr(tdb, TDB_ERR_RDONLY, TDB_LOG_USE_ERROR, + "Write lock attempted on read-only database"); + } + + /* A 32 bit system cannot open a 64-bit file, but it could have + * expanded since then: check here. */ + if ((size_t)(offset + len) != offset + len) { + return tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR, + "tdb_brlock: lock on giant offset %llu", + (long long)(offset + len)); + } + + ret = lock(tdb, rw_type, offset, len, flags & TDB_LOCK_WAIT); + if (ret != 0) { + /* Generic lock error. errno set by fcntl. + * EAGAIN is an expected return from non-blocking + * locks. */ + if (!(flags & TDB_LOCK_PROBE) + && (errno != EAGAIN && errno != EINTR)) { + tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR, + "tdb_brlock failed (fd=%d) at" + " offset %zu rw_type=%d flags=%d len=%zu:" + " %s", + tdb->file->fd, (size_t)offset, rw_type, + flags, (size_t)len, strerror(errno)); + } + return TDB_ERR_LOCK; + } + return TDB_SUCCESS; +} + +static enum TDB_ERROR tdb_brunlock(struct tdb_context *tdb, + int rw_type, tdb_off_t offset, size_t len) +{ + if (tdb->flags & TDB_NOLOCK) { + return TDB_SUCCESS; + } + + if (!check_lock_pid(tdb, "tdb_brunlock", true)) + return TDB_ERR_LOCK; + + if (unlock(tdb, rw_type, offset, len) == -1) { + return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR, + "tdb_brunlock failed (fd=%d) at offset %zu" + " rw_type=%d len=%zu: %s", + tdb->file->fd, (size_t)offset, rw_type, + (size_t)len, strerror(errno)); + } + return TDB_SUCCESS; +} + +/* + upgrade a read lock to a write lock. This needs to be handled in a + special way as some OSes (such as solaris) have too conservative + deadlock detection and claim a deadlock when progress can be + made. For those OSes we may loop for a while. +*/ +enum TDB_ERROR tdb_allrecord_upgrade(struct tdb_context *tdb) +{ + int count = 1000; + + if (!check_lock_pid(tdb, "tdb_transaction_prepare_commit", true)) + return TDB_ERR_LOCK; + + if (tdb->file->allrecord_lock.count != 1) { + return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR, + "tdb_allrecord_upgrade failed:" + " count %u too high", + tdb->file->allrecord_lock.count); + } + + if (tdb->file->allrecord_lock.off != 1) { + return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR, + "tdb_allrecord_upgrade failed:" + " already upgraded?"); + } + + if (tdb->file->allrecord_lock.owner != tdb) { + return owner_conflict(tdb, "tdb_allrecord_upgrade"); + } + + while (count--) { + struct timeval tv; + if (tdb_brlock(tdb, F_WRLCK, + TDB_HASH_LOCK_START, 0, + TDB_LOCK_WAIT|TDB_LOCK_PROBE) == TDB_SUCCESS) { + tdb->file->allrecord_lock.ltype = F_WRLCK; + tdb->file->allrecord_lock.off = 0; + return TDB_SUCCESS; + } + if (errno != EDEADLK) { + break; + } + /* sleep for as short a time as we can - more portable than usleep() */ + tv.tv_sec = 0; + tv.tv_usec = 1; + select(0, NULL, NULL, NULL, &tv); + } + + if (errno != EAGAIN && errno != EINTR) + tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR, + "tdb_allrecord_upgrade failed"); + return TDB_ERR_LOCK; +} + +static struct tdb_lock *find_nestlock(struct tdb_context *tdb, tdb_off_t offset, + const struct tdb_context *owner) +{ + unsigned int i; + + for (i=0; i<tdb->file->num_lockrecs; i++) { + if (tdb->file->lockrecs[i].off == offset) { + if (owner && tdb->file->lockrecs[i].owner != owner) + return NULL; + return &tdb->file->lockrecs[i]; + } + } + return NULL; +} + +enum TDB_ERROR tdb_lock_and_recover(struct tdb_context *tdb) +{ + enum TDB_ERROR ecode; + + if (!check_lock_pid(tdb, "tdb_transaction_prepare_commit", true)) + return TDB_ERR_LOCK; + + ecode = tdb_allrecord_lock(tdb, F_WRLCK, TDB_LOCK_WAIT|TDB_LOCK_NOCHECK, + false); + if (ecode != TDB_SUCCESS) { + return ecode; + } + + ecode = tdb_lock_open(tdb, F_WRLCK, TDB_LOCK_WAIT|TDB_LOCK_NOCHECK); + if (ecode != TDB_SUCCESS) { + tdb_allrecord_unlock(tdb, F_WRLCK); + return ecode; + } + ecode = tdb_transaction_recover(tdb); + tdb_unlock_open(tdb, F_WRLCK); + tdb_allrecord_unlock(tdb, F_WRLCK); + + return ecode; +} + +/* lock an offset in the database. */ +static enum TDB_ERROR tdb_nest_lock(struct tdb_context *tdb, + tdb_off_t offset, int ltype, + enum tdb_lock_flags flags) +{ + struct tdb_lock *new_lck; + enum TDB_ERROR ecode; + + if (offset > (TDB_HASH_LOCK_START + TDB_HASH_LOCK_RANGE + + tdb->file->map_size / 8)) { + return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR, + "tdb_nest_lock: invalid offset %zu ltype=%d", + (size_t)offset, ltype); + } + + if (tdb->flags & TDB_NOLOCK) + return TDB_SUCCESS; + + if (!check_lock_pid(tdb, "tdb_nest_lock", true)) { + return TDB_ERR_LOCK; + } + + tdb->stats.locks++; + + new_lck = find_nestlock(tdb, offset, NULL); + if (new_lck) { + if (new_lck->owner != tdb) { + return owner_conflict(tdb, "tdb_nest_lock"); + } + + if (new_lck->ltype == F_RDLCK && ltype == F_WRLCK) { + return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR, + "tdb_nest_lock:" + " offset %zu has read lock", + (size_t)offset); + } + /* Just increment the struct, posix locks don't stack. */ + new_lck->count++; + return TDB_SUCCESS; + } + +#if 0 + if (tdb->file->num_lockrecs + && offset >= TDB_HASH_LOCK_START + && offset < TDB_HASH_LOCK_START + TDB_HASH_LOCK_RANGE) { + return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR, + "tdb_nest_lock: already have a hash lock?"); + } +#endif + + new_lck = (struct tdb_lock *)realloc( + tdb->file->lockrecs, + sizeof(*tdb->file->lockrecs) * (tdb->file->num_lockrecs+1)); + if (new_lck == NULL) { + return tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR, + "tdb_nest_lock:" + " unable to allocate %zu lock struct", + tdb->file->num_lockrecs + 1); + } + tdb->file->lockrecs = new_lck; + + /* Since fcntl locks don't nest, we do a lock for the first one, + and simply bump the count for future ones */ + ecode = tdb_brlock(tdb, ltype, offset, 1, flags); + if (ecode != TDB_SUCCESS) { + return ecode; + } + + /* First time we grab a lock, perhaps someone died in commit? */ + if (!(flags & TDB_LOCK_NOCHECK) + && tdb->file->num_lockrecs == 0) { + tdb_bool_err berr = tdb_needs_recovery(tdb); + if (berr != false) { + tdb_brunlock(tdb, ltype, offset, 1); + + if (berr < 0) + return berr; + ecode = tdb_lock_and_recover(tdb); + if (ecode == TDB_SUCCESS) { + ecode = tdb_brlock(tdb, ltype, offset, 1, + flags); + } + if (ecode != TDB_SUCCESS) { + return ecode; + } + } + } + + tdb->file->lockrecs[tdb->file->num_lockrecs].owner = tdb; + tdb->file->lockrecs[tdb->file->num_lockrecs].off = offset; + tdb->file->lockrecs[tdb->file->num_lockrecs].count = 1; + tdb->file->lockrecs[tdb->file->num_lockrecs].ltype = ltype; + tdb->file->num_lockrecs++; + + return TDB_SUCCESS; +} + +static enum TDB_ERROR tdb_nest_unlock(struct tdb_context *tdb, + tdb_off_t off, int ltype) +{ + struct tdb_lock *lck; + enum TDB_ERROR ecode; + + if (tdb->flags & TDB_NOLOCK) + return TDB_SUCCESS; + + lck = find_nestlock(tdb, off, tdb); + if ((lck == NULL) || (lck->count == 0)) { + return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR, + "tdb_nest_unlock: no lock for %zu", + (size_t)off); + } + + if (lck->count > 1) { + lck->count--; + return TDB_SUCCESS; + } + + /* + * This lock has count==1 left, so we need to unlock it in the + * kernel. We don't bother with decrementing the in-memory array + * element, we're about to overwrite it with the last array element + * anyway. + */ + ecode = tdb_brunlock(tdb, ltype, off, 1); + + /* + * Shrink the array by overwriting the element just unlocked with the + * last array element. + */ + *lck = tdb->file->lockrecs[--tdb->file->num_lockrecs]; + + return ecode; +} + +/* + get the transaction lock + */ +enum TDB_ERROR tdb_transaction_lock(struct tdb_context *tdb, int ltype) +{ + return tdb_nest_lock(tdb, TDB_TRANSACTION_LOCK, ltype, TDB_LOCK_WAIT); +} + +/* + release the transaction lock + */ +void tdb_transaction_unlock(struct tdb_context *tdb, int ltype) +{ + tdb_nest_unlock(tdb, TDB_TRANSACTION_LOCK, ltype); +} + +/* We only need to lock individual bytes, but Linux merges consecutive locks + * so we lock in contiguous ranges. */ +static enum TDB_ERROR tdb_lock_gradual(struct tdb_context *tdb, + int ltype, enum tdb_lock_flags flags, + tdb_off_t off, tdb_off_t len) +{ + enum TDB_ERROR ecode; + enum tdb_lock_flags nb_flags = (flags & ~TDB_LOCK_WAIT); + + if (len <= 1) { + /* 0 would mean to end-of-file... */ + assert(len != 0); + /* Single hash. Just do blocking lock. */ + return tdb_brlock(tdb, ltype, off, len, flags); + } + + /* First we try non-blocking. */ + if (tdb_brlock(tdb, ltype, off, len, nb_flags) == TDB_SUCCESS) { + return TDB_SUCCESS; + } + + /* Try locking first half, then second. */ + ecode = tdb_lock_gradual(tdb, ltype, flags, off, len / 2); + if (ecode != TDB_SUCCESS) + return ecode; + + ecode = tdb_lock_gradual(tdb, ltype, flags, + off + len / 2, len - len / 2); + if (ecode != TDB_SUCCESS) { + tdb_brunlock(tdb, ltype, off, len / 2); + } + return ecode; +} + +/* lock/unlock entire database. It can only be upgradable if you have some + * other way of guaranteeing exclusivity (ie. transaction write lock). */ +enum TDB_ERROR tdb_allrecord_lock(struct tdb_context *tdb, int ltype, + enum tdb_lock_flags flags, bool upgradable) +{ + enum TDB_ERROR ecode; + tdb_bool_err berr; + + if (tdb->flags & TDB_NOLOCK) + return TDB_SUCCESS; + + if (!check_lock_pid(tdb, "tdb_allrecord_lock", true)) { + return TDB_ERR_LOCK; + } + + if (tdb->file->allrecord_lock.count) { + if (tdb->file->allrecord_lock.owner != tdb) { + return owner_conflict(tdb, "tdb_allrecord_lock"); + } + + if (ltype == F_RDLCK + || tdb->file->allrecord_lock.ltype == F_WRLCK) { + tdb->file->allrecord_lock.count++; + return TDB_SUCCESS; + } + + /* a global lock of a different type exists */ + return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_USE_ERROR, + "tdb_allrecord_lock: already have %s lock", + tdb->file->allrecord_lock.ltype == F_RDLCK + ? "read" : "write"); + } + + if (tdb_has_hash_locks(tdb)) { + /* can't combine global and chain locks */ + return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_USE_ERROR, + "tdb_allrecord_lock:" + " already have chain lock"); + } + + if (upgradable && ltype != F_RDLCK) { + /* tdb error: you can't upgrade a write lock! */ + return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR, + "tdb_allrecord_lock:" + " can't upgrade a write lock"); + } + + tdb->stats.locks++; +again: + /* Lock hashes, gradually. */ + ecode = tdb_lock_gradual(tdb, ltype, flags, TDB_HASH_LOCK_START, + TDB_HASH_LOCK_RANGE); + if (ecode != TDB_SUCCESS) + return ecode; + + /* Lock free tables: there to end of file. */ + ecode = tdb_brlock(tdb, ltype, + TDB_HASH_LOCK_START + TDB_HASH_LOCK_RANGE, + 0, flags); + if (ecode != TDB_SUCCESS) { + tdb_brunlock(tdb, ltype, TDB_HASH_LOCK_START, + TDB_HASH_LOCK_RANGE); + return ecode; + } + + tdb->file->allrecord_lock.owner = tdb; + tdb->file->allrecord_lock.count = 1; + /* If it's upgradable, it's actually exclusive so we can treat + * it as a write lock. */ + tdb->file->allrecord_lock.ltype = upgradable ? F_WRLCK : ltype; + tdb->file->allrecord_lock.off = upgradable; + + /* Now check for needing recovery. */ + if (flags & TDB_LOCK_NOCHECK) + return TDB_SUCCESS; + + berr = tdb_needs_recovery(tdb); + if (likely(berr == false)) + return TDB_SUCCESS; + + tdb_allrecord_unlock(tdb, ltype); + if (berr < 0) + return berr; + ecode = tdb_lock_and_recover(tdb); + if (ecode != TDB_SUCCESS) { + return ecode; + } + goto again; +} + +enum TDB_ERROR tdb_lock_open(struct tdb_context *tdb, + int ltype, enum tdb_lock_flags flags) +{ + return tdb_nest_lock(tdb, TDB_OPEN_LOCK, ltype, flags); +} + +void tdb_unlock_open(struct tdb_context *tdb, int ltype) +{ + tdb_nest_unlock(tdb, TDB_OPEN_LOCK, ltype); +} + +bool tdb_has_open_lock(struct tdb_context *tdb) +{ + return !(tdb->flags & TDB_NOLOCK) + && find_nestlock(tdb, TDB_OPEN_LOCK, tdb) != NULL; +} + +enum TDB_ERROR tdb_lock_expand(struct tdb_context *tdb, int ltype) +{ + /* Lock doesn't protect data, so don't check (we recurse if we do!) */ + return tdb_nest_lock(tdb, TDB_EXPANSION_LOCK, ltype, + TDB_LOCK_WAIT | TDB_LOCK_NOCHECK); +} + +void tdb_unlock_expand(struct tdb_context *tdb, int ltype) +{ + tdb_nest_unlock(tdb, TDB_EXPANSION_LOCK, ltype); +} + +/* unlock entire db */ +void tdb_allrecord_unlock(struct tdb_context *tdb, int ltype) +{ + if (tdb->flags & TDB_NOLOCK) + return; + + if (tdb->file->allrecord_lock.count == 0) { + tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_USE_ERROR, + "tdb_allrecord_unlock: not locked!"); + return; + } + + if (tdb->file->allrecord_lock.owner != tdb) { + tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_USE_ERROR, + "tdb_allrecord_unlock: not locked by us!"); + return; + } + + /* Upgradable locks are marked as write locks. */ + if (tdb->file->allrecord_lock.ltype != ltype + && (!tdb->file->allrecord_lock.off || ltype != F_RDLCK)) { + tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR, + "tdb_allrecord_unlock: have %s lock", + tdb->file->allrecord_lock.ltype == F_RDLCK + ? "read" : "write"); + return; + } + + if (tdb->file->allrecord_lock.count > 1) { + tdb->file->allrecord_lock.count--; + return; + } + + tdb->file->allrecord_lock.count = 0; + tdb->file->allrecord_lock.ltype = 0; + + tdb_brunlock(tdb, ltype, TDB_HASH_LOCK_START, 0); +} + +bool tdb_has_expansion_lock(struct tdb_context *tdb) +{ + return find_nestlock(tdb, TDB_EXPANSION_LOCK, tdb) != NULL; +} + +bool tdb_has_hash_locks(struct tdb_context *tdb) +{ + unsigned int i; + + for (i=0; i<tdb->file->num_lockrecs; i++) { + if (tdb->file->lockrecs[i].off >= TDB_HASH_LOCK_START + && tdb->file->lockrecs[i].off < (TDB_HASH_LOCK_START + + TDB_HASH_LOCK_RANGE)) + return true; + } + return false; +} + +static bool tdb_has_free_lock(struct tdb_context *tdb) +{ + unsigned int i; + + if (tdb->flags & TDB_NOLOCK) + return false; + + for (i=0; i<tdb->file->num_lockrecs; i++) { + if (tdb->file->lockrecs[i].off + > TDB_HASH_LOCK_START + TDB_HASH_LOCK_RANGE) + return true; + } + return false; +} + +enum TDB_ERROR tdb_lock_hashes(struct tdb_context *tdb, + tdb_off_t hash_lock, + tdb_len_t hash_range, + int ltype, enum tdb_lock_flags waitflag) +{ + /* FIXME: Do this properly, using hlock_range */ + unsigned l = TDB_HASH_LOCK_START + + (hash_lock >> (64 - TDB_HASH_LOCK_RANGE_BITS)); + + /* a allrecord lock allows us to avoid per chain locks */ + if (tdb->file->allrecord_lock.count) { + if (!check_lock_pid(tdb, "tdb_lock_hashes", true)) + return TDB_ERR_LOCK; + + if (tdb->file->allrecord_lock.owner != tdb) + return owner_conflict(tdb, "tdb_lock_hashes"); + if (ltype == tdb->file->allrecord_lock.ltype + || ltype == F_RDLCK) { + return TDB_SUCCESS; + } + + return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_USE_ERROR, + "tdb_lock_hashes:" + " already have %s allrecordlock", + tdb->file->allrecord_lock.ltype == F_RDLCK + ? "read" : "write"); + } + + if (tdb_has_free_lock(tdb)) { + return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR, + "tdb_lock_hashes: already have free lock"); + } + + if (tdb_has_expansion_lock(tdb)) { + return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR, + "tdb_lock_hashes:" + " already have expansion lock"); + } + + return tdb_nest_lock(tdb, l, ltype, waitflag); +} + +enum TDB_ERROR tdb_unlock_hashes(struct tdb_context *tdb, + tdb_off_t hash_lock, + tdb_len_t hash_range, int ltype) +{ + unsigned l = TDB_HASH_LOCK_START + + (hash_lock >> (64 - TDB_HASH_LOCK_RANGE_BITS)); + + if (tdb->flags & TDB_NOLOCK) + return 0; + + /* a allrecord lock allows us to avoid per chain locks */ + if (tdb->file->allrecord_lock.count) { + if (tdb->file->allrecord_lock.ltype == F_RDLCK + && ltype == F_WRLCK) { + return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR, + "tdb_unlock_hashes RO allrecord!"); + } + return TDB_SUCCESS; + } + + return tdb_nest_unlock(tdb, l, ltype); +} + +/* Hash locks use TDB_HASH_LOCK_START + the next 30 bits. + * Then we begin; bucket offsets are sizeof(tdb_len_t) apart, so we divide. + * The result is that on 32 bit systems we don't use lock values > 2^31 on + * files that are less than 4GB. + */ +static tdb_off_t free_lock_off(tdb_off_t b_off) +{ + return TDB_HASH_LOCK_START + TDB_HASH_LOCK_RANGE + + b_off / sizeof(tdb_off_t); +} + +enum TDB_ERROR tdb_lock_free_bucket(struct tdb_context *tdb, tdb_off_t b_off, + enum tdb_lock_flags waitflag) +{ + assert(b_off >= sizeof(struct tdb_header)); + + if (tdb->flags & TDB_NOLOCK) + return 0; + + /* a allrecord lock allows us to avoid per chain locks */ + if (tdb->file->allrecord_lock.count) { + if (!check_lock_pid(tdb, "tdb_lock_free_bucket", true)) + return TDB_ERR_LOCK; + + if (tdb->file->allrecord_lock.ltype == F_WRLCK) + return 0; + return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR, + "tdb_lock_free_bucket with" + " read-only allrecordlock!"); + } + +#if 0 /* FIXME */ + if (tdb_has_expansion_lock(tdb)) { + return tdb_logerr(tdb, TDB_ERR_LOCK, TDB_LOG_ERROR, + "tdb_lock_free_bucket:" + " already have expansion lock"); + } +#endif + + return tdb_nest_lock(tdb, free_lock_off(b_off), F_WRLCK, waitflag); +} + +void tdb_unlock_free_bucket(struct tdb_context *tdb, tdb_off_t b_off) +{ + if (tdb->file->allrecord_lock.count) + return; + + tdb_nest_unlock(tdb, free_lock_off(b_off), F_WRLCK); +} + +enum TDB_ERROR tdb_lockall(struct tdb_context *tdb) +{ + return tdb_allrecord_lock(tdb, F_WRLCK, TDB_LOCK_WAIT, false); +} + +void tdb_unlockall(struct tdb_context *tdb) +{ + tdb_allrecord_unlock(tdb, F_WRLCK); +} + +enum TDB_ERROR tdb_lockall_read(struct tdb_context *tdb) +{ + return tdb_allrecord_lock(tdb, F_RDLCK, TDB_LOCK_WAIT, false); +} + +void tdb_unlockall_read(struct tdb_context *tdb) +{ + tdb_allrecord_unlock(tdb, F_RDLCK); +} + +void tdb_lock_cleanup(struct tdb_context *tdb) +{ + unsigned int i; + + /* We don't want to warn: they're allowed to close tdb after fork. */ + if (!check_lock_pid(tdb, "tdb_close", false)) + return; + + while (tdb->file->allrecord_lock.count + && tdb->file->allrecord_lock.owner == tdb) { + tdb_allrecord_unlock(tdb, tdb->file->allrecord_lock.ltype); + } + + for (i=0; i<tdb->file->num_lockrecs; i++) { + if (tdb->file->lockrecs[i].owner == tdb) { + tdb_nest_unlock(tdb, + tdb->file->lockrecs[i].off, + tdb->file->lockrecs[i].ltype); + i--; + } + } +} diff --git a/lib/tdb2/open.c b/lib/tdb2/open.c new file mode 100644 index 0000000000..c35598cdcc --- /dev/null +++ b/lib/tdb2/open.c @@ -0,0 +1,661 @@ +#include "private.h" +#include <ccan/hash/hash.h> +#include <assert.h> + +/* all lock info, to detect double-opens (fcntl file don't nest!) */ +static struct tdb_file *files = NULL; + +static struct tdb_file *find_file(dev_t device, ino_t ino) +{ + struct tdb_file *i; + + for (i = files; i; i = i->next) { + if (i->device == device && i->inode == ino) { + i->refcnt++; + break; + } + } + return i; +} + +static bool read_all(int fd, void *buf, size_t len) +{ + while (len) { + ssize_t ret; + ret = read(fd, buf, len); + if (ret < 0) + return false; + if (ret == 0) { + /* ETOOSHORT? */ + errno = EWOULDBLOCK; + return false; + } + buf = (char *)buf + ret; + len -= ret; + } + return true; +} + +static uint64_t random_number(struct tdb_context *tdb) +{ + int fd; + uint64_t ret = 0; + struct timeval now; + + fd = open("/dev/urandom", O_RDONLY); + if (fd >= 0) { + if (read_all(fd, &ret, sizeof(ret))) { + close(fd); + return ret; + } + close(fd); + } + /* FIXME: Untested! Based on Wikipedia protocol description! */ + fd = open("/dev/egd-pool", O_RDWR); + if (fd >= 0) { + /* Command is 1, next byte is size we want to read. */ + char cmd[2] = { 1, sizeof(uint64_t) }; + if (write(fd, cmd, sizeof(cmd)) == sizeof(cmd)) { + char reply[1 + sizeof(uint64_t)]; + int r = read(fd, reply, sizeof(reply)); + if (r > 1) { + /* Copy at least some bytes. */ + memcpy(&ret, reply+1, r - 1); + if (reply[0] == sizeof(uint64_t) + && r == sizeof(reply)) { + close(fd); + return ret; + } + } + } + close(fd); + } + + /* Fallback: pid and time. */ + gettimeofday(&now, NULL); + ret = getpid() * 100132289ULL + now.tv_sec * 1000000ULL + now.tv_usec; + tdb_logerr(tdb, TDB_SUCCESS, TDB_LOG_WARNING, + "tdb_open: random from getpid and time"); + return ret; +} + +struct new_database { + struct tdb_header hdr; + struct tdb_freetable ftable; +}; + +/* initialise a new database */ +static enum TDB_ERROR tdb_new_database(struct tdb_context *tdb, + struct tdb_attribute_seed *seed, + struct tdb_header *hdr) +{ + /* We make it up in memory, then write it out if not internal */ + struct new_database newdb; + unsigned int magic_len; + ssize_t rlen; + enum TDB_ERROR ecode; + + /* Fill in the header */ + newdb.hdr.version = TDB_VERSION; + if (seed) + newdb.hdr.hash_seed = seed->seed; + else + newdb.hdr.hash_seed = random_number(tdb); + newdb.hdr.hash_test = TDB_HASH_MAGIC; + newdb.hdr.hash_test = tdb->hash_fn(&newdb.hdr.hash_test, + sizeof(newdb.hdr.hash_test), + newdb.hdr.hash_seed, + tdb->hash_data); + newdb.hdr.recovery = 0; + newdb.hdr.features_used = newdb.hdr.features_offered = TDB_FEATURE_MASK; + newdb.hdr.seqnum = 0; + memset(newdb.hdr.reserved, 0, sizeof(newdb.hdr.reserved)); + /* Initial hashes are empty. */ + memset(newdb.hdr.hashtable, 0, sizeof(newdb.hdr.hashtable)); + + /* Free is empty. */ + newdb.hdr.free_table = offsetof(struct new_database, ftable); + memset(&newdb.ftable, 0, sizeof(newdb.ftable)); + ecode = set_header(NULL, &newdb.ftable.hdr, TDB_FTABLE_MAGIC, 0, + sizeof(newdb.ftable) - sizeof(newdb.ftable.hdr), + sizeof(newdb.ftable) - sizeof(newdb.ftable.hdr), + 0); + if (ecode != TDB_SUCCESS) { + return ecode; + } + + /* Magic food */ + memset(newdb.hdr.magic_food, 0, sizeof(newdb.hdr.magic_food)); + strcpy(newdb.hdr.magic_food, TDB_MAGIC_FOOD); + + /* This creates an endian-converted database, as if read from disk */ + magic_len = sizeof(newdb.hdr.magic_food); + tdb_convert(tdb, + (char *)&newdb.hdr + magic_len, sizeof(newdb) - magic_len); + + *hdr = newdb.hdr; + + if (tdb->flags & TDB_INTERNAL) { + tdb->file->map_size = sizeof(newdb); + tdb->file->map_ptr = malloc(tdb->file->map_size); + if (!tdb->file->map_ptr) { + return tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR, + "tdb_new_database:" + " failed to allocate"); + } + memcpy(tdb->file->map_ptr, &newdb, tdb->file->map_size); + return TDB_SUCCESS; + } + if (lseek(tdb->file->fd, 0, SEEK_SET) == -1) { + return tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR, + "tdb_new_database:" + " failed to seek: %s", strerror(errno)); + } + + if (ftruncate(tdb->file->fd, 0) == -1) { + return tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR, + "tdb_new_database:" + " failed to truncate: %s", strerror(errno)); + } + + rlen = write(tdb->file->fd, &newdb, sizeof(newdb)); + if (rlen != sizeof(newdb)) { + if (rlen >= 0) + errno = ENOSPC; + return tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR, + "tdb_new_database: %zi writing header: %s", + rlen, strerror(errno)); + } + return TDB_SUCCESS; +} + +static enum TDB_ERROR tdb_new_file(struct tdb_context *tdb) +{ + tdb->file = malloc(sizeof(*tdb->file)); + if (!tdb->file) + return tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR, + "tdb_open: cannot alloc tdb_file structure"); + tdb->file->num_lockrecs = 0; + tdb->file->lockrecs = NULL; + tdb->file->allrecord_lock.count = 0; + tdb->file->refcnt = 1; + return TDB_SUCCESS; +} + +enum TDB_ERROR tdb_set_attribute(struct tdb_context *tdb, + const union tdb_attribute *attr) +{ + switch (attr->base.attr) { + case TDB_ATTRIBUTE_LOG: + tdb->log_fn = attr->log.fn; + tdb->log_data = attr->log.data; + break; + case TDB_ATTRIBUTE_HASH: + case TDB_ATTRIBUTE_SEED: + case TDB_ATTRIBUTE_OPENHOOK: + return tdb->last_error + = tdb_logerr(tdb, TDB_ERR_EINVAL, + TDB_LOG_USE_ERROR, + "tdb_set_attribute:" + " cannot set %s after opening", + attr->base.attr == TDB_ATTRIBUTE_HASH + ? "TDB_ATTRIBUTE_HASH" + : attr->base.attr == TDB_ATTRIBUTE_SEED + ? "TDB_ATTRIBUTE_SEED" + : "TDB_ATTRIBUTE_OPENHOOK"); + case TDB_ATTRIBUTE_STATS: + return tdb->last_error + = tdb_logerr(tdb, TDB_ERR_EINVAL, + TDB_LOG_USE_ERROR, + "tdb_set_attribute:" + " cannot set TDB_ATTRIBUTE_STATS"); + case TDB_ATTRIBUTE_FLOCK: + tdb->lock_fn = attr->flock.lock; + tdb->unlock_fn = attr->flock.unlock; + tdb->lock_data = attr->flock.data; + break; + default: + return tdb->last_error + = tdb_logerr(tdb, TDB_ERR_EINVAL, + TDB_LOG_USE_ERROR, + "tdb_set_attribute:" + " unknown attribute type %u", + attr->base.attr); + } + return TDB_SUCCESS; +} + +static uint64_t jenkins_hash(const void *key, size_t length, uint64_t seed, + void *unused) +{ + uint64_t ret; + /* hash64_stable assumes lower bits are more important; they are a + * slightly better hash. We use the upper bits first, so swap them. */ + ret = hash64_stable((const unsigned char *)key, length, seed); + return (ret >> 32) | (ret << 32); +} + +enum TDB_ERROR tdb_get_attribute(struct tdb_context *tdb, + union tdb_attribute *attr) +{ + switch (attr->base.attr) { + case TDB_ATTRIBUTE_LOG: + if (!tdb->log_fn) + return tdb->last_error = TDB_ERR_NOEXIST; + attr->log.fn = tdb->log_fn; + attr->log.data = tdb->log_data; + break; + case TDB_ATTRIBUTE_HASH: + attr->hash.fn = tdb->hash_fn; + attr->hash.data = tdb->hash_data; + break; + case TDB_ATTRIBUTE_SEED: + attr->seed.seed = tdb->hash_seed; + break; + case TDB_ATTRIBUTE_OPENHOOK: + return tdb->last_error + = tdb_logerr(tdb, TDB_ERR_EINVAL, + TDB_LOG_USE_ERROR, + "tdb_get_attribute:" + " cannot get TDB_ATTRIBUTE_OPENHOOK"); + case TDB_ATTRIBUTE_STATS: { + size_t size = attr->stats.size; + if (size > tdb->stats.size) + size = tdb->stats.size; + memcpy(&attr->stats, &tdb->stats, size); + break; + } + case TDB_ATTRIBUTE_FLOCK: + attr->flock.lock = tdb->lock_fn; + attr->flock.unlock = tdb->unlock_fn; + attr->flock.data = tdb->lock_data; + break; + default: + return tdb->last_error + = tdb_logerr(tdb, TDB_ERR_EINVAL, + TDB_LOG_USE_ERROR, + "tdb_get_attribute:" + " unknown attribute type %u", + attr->base.attr); + } + attr->base.next = NULL; + return TDB_SUCCESS; +} + +void tdb_unset_attribute(struct tdb_context *tdb, + enum tdb_attribute_type type) +{ + switch (type) { + case TDB_ATTRIBUTE_LOG: + tdb->log_fn = NULL; + break; + case TDB_ATTRIBUTE_HASH: + case TDB_ATTRIBUTE_SEED: + case TDB_ATTRIBUTE_OPENHOOK: + tdb_logerr(tdb, TDB_ERR_EINVAL, TDB_LOG_USE_ERROR, + "tdb_unset_attribute: cannot unset %s after opening", + type == TDB_ATTRIBUTE_HASH + ? "TDB_ATTRIBUTE_HASH" + : type == TDB_ATTRIBUTE_SEED + ? "TDB_ATTRIBUTE_SEED" + : "TDB_ATTRIBUTE_OPENHOOK"); + break; + case TDB_ATTRIBUTE_STATS: + tdb_logerr(tdb, TDB_ERR_EINVAL, + TDB_LOG_USE_ERROR, + "tdb_unset_attribute:" + "cannot unset TDB_ATTRIBUTE_STATS"); + break; + case TDB_ATTRIBUTE_FLOCK: + tdb->lock_fn = tdb_fcntl_lock; + tdb->unlock_fn = tdb_fcntl_unlock; + break; + default: + tdb_logerr(tdb, TDB_ERR_EINVAL, + TDB_LOG_USE_ERROR, + "tdb_unset_attribute: unknown attribute type %u", + type); + } +} + +struct tdb_context *tdb_open(const char *name, int tdb_flags, + int open_flags, mode_t mode, + union tdb_attribute *attr) +{ + struct tdb_context *tdb; + struct stat st; + int saved_errno = 0; + uint64_t hash_test; + unsigned v; + ssize_t rlen; + struct tdb_header hdr; + struct tdb_attribute_seed *seed = NULL; + struct tdb_attribute_openhook *openhook = NULL; + tdb_bool_err berr; + enum TDB_ERROR ecode; + int openlock; + + tdb = malloc(sizeof(*tdb) + (name ? strlen(name) + 1 : 0)); + if (!tdb) { + /* Can't log this */ + errno = ENOMEM; + return NULL; + } + /* Set name immediately for logging functions. */ + if (name) { + tdb->name = strcpy((char *)(tdb + 1), name); + } else { + tdb->name = NULL; + } + tdb->direct_access = 0; + tdb->flags = tdb_flags; + tdb->log_fn = NULL; + tdb->transaction = NULL; + tdb->access = NULL; + tdb->last_error = TDB_SUCCESS; + tdb->file = NULL; + tdb->lock_fn = tdb_fcntl_lock; + tdb->unlock_fn = tdb_fcntl_unlock; + tdb->hash_fn = jenkins_hash; + memset(&tdb->stats, 0, sizeof(tdb->stats)); + tdb->stats.base.attr = TDB_ATTRIBUTE_STATS; + tdb->stats.size = sizeof(tdb->stats); + tdb_io_init(tdb); + + while (attr) { + switch (attr->base.attr) { + case TDB_ATTRIBUTE_HASH: + tdb->hash_fn = attr->hash.fn; + tdb->hash_data = attr->hash.data; + break; + case TDB_ATTRIBUTE_SEED: + seed = &attr->seed; + break; + case TDB_ATTRIBUTE_OPENHOOK: + openhook = &attr->openhook; + break; + default: + /* These are set as normal. */ + ecode = tdb_set_attribute(tdb, attr); + if (ecode != TDB_SUCCESS) + goto fail; + } + attr = attr->base.next; + } + + if (tdb_flags & ~(TDB_INTERNAL | TDB_NOLOCK | TDB_NOMMAP | TDB_CONVERT + | TDB_NOSYNC | TDB_SEQNUM | TDB_ALLOW_NESTING)) { + ecode = tdb_logerr(tdb, TDB_ERR_EINVAL, TDB_LOG_USE_ERROR, + "tdb_open: unknown flags %u", tdb_flags); + goto fail; + } + + if ((open_flags & O_ACCMODE) == O_WRONLY) { + ecode = tdb_logerr(tdb, TDB_ERR_EINVAL, TDB_LOG_USE_ERROR, + "tdb_open: can't open tdb %s write-only", + name); + goto fail; + } + + if ((open_flags & O_ACCMODE) == O_RDONLY) { + tdb->read_only = true; + tdb->mmap_flags = PROT_READ; + openlock = F_RDLCK; + } else { + tdb->read_only = false; + tdb->mmap_flags = PROT_READ | PROT_WRITE; + openlock = F_WRLCK; + } + + /* internal databases don't need any of the rest. */ + if (tdb->flags & TDB_INTERNAL) { + tdb->flags |= (TDB_NOLOCK | TDB_NOMMAP); + ecode = tdb_new_file(tdb); + if (ecode != TDB_SUCCESS) { + goto fail; + } + tdb->file->fd = -1; + ecode = tdb_new_database(tdb, seed, &hdr); + if (ecode != TDB_SUCCESS) { + goto fail; + } + tdb_convert(tdb, &hdr.hash_seed, sizeof(hdr.hash_seed)); + tdb->hash_seed = hdr.hash_seed; + tdb_ftable_init(tdb); + return tdb; + } + + if (stat(name, &st) != -1) + tdb->file = find_file(st.st_dev, st.st_ino); + + if (!tdb->file) { + int fd; + + if ((fd = open(name, open_flags, mode)) == -1) { + /* errno set by open(2) */ + saved_errno = errno; + tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR, + "tdb_open: could not open file %s: %s", + name, strerror(errno)); + goto fail_errno; + } + + /* on exec, don't inherit the fd */ + v = fcntl(fd, F_GETFD, 0); + fcntl(fd, F_SETFD, v | FD_CLOEXEC); + + if (fstat(fd, &st) == -1) { + saved_errno = errno; + tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR, + "tdb_open: could not stat open %s: %s", + name, strerror(errno)); + close(fd); + goto fail_errno; + } + + ecode = tdb_new_file(tdb); + if (ecode != TDB_SUCCESS) { + close(fd); + goto fail; + } + + tdb->file->next = files; + tdb->file->fd = fd; + tdb->file->device = st.st_dev; + tdb->file->inode = st.st_ino; + tdb->file->map_ptr = NULL; + tdb->file->map_size = sizeof(struct tdb_header); + } + + /* ensure there is only one process initialising at once */ + ecode = tdb_lock_open(tdb, openlock, TDB_LOCK_WAIT|TDB_LOCK_NOCHECK); + if (ecode != TDB_SUCCESS) { + saved_errno = errno; + goto fail_errno; + } + + /* call their open hook if they gave us one. */ + if (openhook) { + ecode = openhook->fn(tdb->file->fd, openhook->data); + if (ecode != TDB_SUCCESS) { + tdb_logerr(tdb, ecode, TDB_LOG_ERROR, + "tdb_open: open hook failed"); + goto fail; + } + open_flags |= O_CREAT; + } + + /* If they used O_TRUNC, read will return 0. */ + rlen = pread(tdb->file->fd, &hdr, sizeof(hdr), 0); + if (rlen == 0 && (open_flags & O_CREAT)) { + ecode = tdb_new_database(tdb, seed, &hdr); + if (ecode != TDB_SUCCESS) { + goto fail; + } + } else if (rlen < 0) { + ecode = tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR, + "tdb_open: error %s reading %s", + strerror(errno), name); + goto fail; + } else if (rlen < sizeof(hdr) + || strcmp(hdr.magic_food, TDB_MAGIC_FOOD) != 0) { + ecode = tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR, + "tdb_open: %s is not a tdb file", name); + goto fail; + } + + if (hdr.version != TDB_VERSION) { + if (hdr.version == bswap_64(TDB_VERSION)) + tdb->flags |= TDB_CONVERT; + else { + /* wrong version */ + ecode = tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR, + "tdb_open:" + " %s is unknown version 0x%llx", + name, (long long)hdr.version); + goto fail; + } + } + + tdb_convert(tdb, &hdr, sizeof(hdr)); + tdb->hash_seed = hdr.hash_seed; + hash_test = TDB_HASH_MAGIC; + hash_test = tdb_hash(tdb, &hash_test, sizeof(hash_test)); + if (hdr.hash_test != hash_test) { + /* wrong hash variant */ + ecode = tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR, + "tdb_open:" + " %s uses a different hash function", + name); + goto fail; + } + + /* Clear any features we don't understand. */ + if ((open_flags & O_ACCMODE) != O_RDONLY) { + hdr.features_used &= TDB_FEATURE_MASK; + if (tdb_write_convert(tdb, offsetof(struct tdb_header, + features_used), + &hdr.features_used, + sizeof(hdr.features_used)) == -1) + goto fail; + } + + tdb_unlock_open(tdb, openlock); + + /* This make sure we have current map_size and mmap. */ + tdb->methods->oob(tdb, tdb->file->map_size + 1, true); + + /* Now it's fully formed, recover if necessary. */ + berr = tdb_needs_recovery(tdb); + if (unlikely(berr != false)) { + if (berr < 0) { + ecode = berr; + goto fail; + } + ecode = tdb_lock_and_recover(tdb); + if (ecode != TDB_SUCCESS) { + goto fail; + } + } + + ecode = tdb_ftable_init(tdb); + if (ecode != TDB_SUCCESS) { + goto fail; + } + + /* Add to linked list if we're new. */ + if (tdb->file->refcnt == 1) + files = tdb->file; + return tdb; + + fail: + /* Map ecode to some logical errno. */ + switch (ecode) { + case TDB_ERR_CORRUPT: + case TDB_ERR_IO: + saved_errno = EIO; + break; + case TDB_ERR_LOCK: + saved_errno = EWOULDBLOCK; + break; + case TDB_ERR_OOM: + saved_errno = ENOMEM; + break; + case TDB_ERR_EINVAL: + saved_errno = EINVAL; + break; + default: + saved_errno = EINVAL; + break; + } + +fail_errno: +#ifdef TDB_TRACE + close(tdb->tracefd); +#endif + if (tdb->file) { + tdb_lock_cleanup(tdb); + if (--tdb->file->refcnt == 0) { + assert(tdb->file->num_lockrecs == 0); + if (tdb->file->map_ptr) { + if (tdb->flags & TDB_INTERNAL) { + free(tdb->file->map_ptr); + } else + tdb_munmap(tdb->file); + } + if (close(tdb->file->fd) != 0) + tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR, + "tdb_open: failed to close tdb fd" + " on error: %s", strerror(errno)); + free(tdb->file->lockrecs); + free(tdb->file); + } + } + + free(tdb); + errno = saved_errno; + return NULL; +} + +int tdb_close(struct tdb_context *tdb) +{ + int ret = 0; + + tdb_trace(tdb, "tdb_close"); + + if (tdb->transaction) { + tdb_transaction_cancel(tdb); + } + + if (tdb->file->map_ptr) { + if (tdb->flags & TDB_INTERNAL) + free(tdb->file->map_ptr); + else + tdb_munmap(tdb->file); + } + if (tdb->file) { + struct tdb_file **i; + + tdb_lock_cleanup(tdb); + if (--tdb->file->refcnt == 0) { + ret = close(tdb->file->fd); + + /* Remove from files list */ + for (i = &files; *i; i = &(*i)->next) { + if (*i == tdb->file) { + *i = tdb->file->next; + break; + } + } + free(tdb->file->lockrecs); + free(tdb->file); + } + } + +#ifdef TDB_TRACE + close(tdb->tracefd); +#endif + free(tdb); + + return ret; +} diff --git a/lib/tdb2/private.h b/lib/tdb2/private.h new file mode 100644 index 0000000000..0c3e441657 --- /dev/null +++ b/lib/tdb2/private.h @@ -0,0 +1,624 @@ +#ifndef TDB_PRIVATE_H +#define TDB_PRIVATE_H + /* + Trivial Database 2: private types and prototypes + Copyright (C) Rusty Russell 2010 + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 3 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, see <http://www.gnu.org/licenses/>. +*/ + +#include "config.h" +#if HAVE_FILE_OFFSET_BITS +#define _FILE_OFFSET_BITS 64 +#endif +#include <ccan/likely/likely.h> +#include <ccan/compiler/compiler.h> +#include <ccan/endian/endian.h> +#include "tdb2.h" + +#ifdef _SAMBA_BUILD_ +#include "replace.h" +#include "system/filesys.h" +#include "system/time.h" +#include "system/shmem.h" +#include "system/select.h" +#include "system/wait.h" +#else +#include <stdint.h> +#include <stdbool.h> +#include <stdlib.h> +#include <stddef.h> +#include <sys/time.h> +#include <sys/mman.h> +#include <unistd.h> +#include <fcntl.h> +#include <string.h> +#include <errno.h> +#include <stdio.h> +#include <utime.h> +#include <unistd.h> +#endif + +#ifndef TEST_IT +#define TEST_IT(cond) +#endif + +/* #define TDB_TRACE 1 */ + +#ifndef __STRING +#define __STRING(x) #x +#endif + +#ifndef __STRINGSTRING +#define __STRINGSTRING(x) __STRING(x) +#endif + +#ifndef __location__ +#define __location__ __FILE__ ":" __STRINGSTRING(__LINE__) +#endif + +typedef uint64_t tdb_len_t; +typedef uint64_t tdb_off_t; + +#define TDB_MAGIC_FOOD "TDB file\n" +#define TDB_VERSION ((uint64_t)(0x26011967 + 7)) +#define TDB_USED_MAGIC ((uint64_t)0x1999) +#define TDB_HTABLE_MAGIC ((uint64_t)0x1888) +#define TDB_CHAIN_MAGIC ((uint64_t)0x1777) +#define TDB_FTABLE_MAGIC ((uint64_t)0x1666) +#define TDB_FREE_MAGIC ((uint64_t)0xFE) +#define TDB_HASH_MAGIC (0xA1ABE11A01092008ULL) +#define TDB_RECOVERY_MAGIC (0xf53bc0e7ad124589ULL) +#define TDB_RECOVERY_INVALID_MAGIC (0x0ULL) + +#define TDB_OFF_IS_ERR(off) unlikely(off >= (tdb_off_t)TDB_ERR_LAST) + +/* Packing errors into pointers and v.v. */ +#define TDB_PTR_IS_ERR(ptr) \ + unlikely((unsigned long)(ptr) >= (unsigned long)TDB_ERR_LAST) +#define TDB_PTR_ERR(p) ((enum TDB_ERROR)(long)(p)) +#define TDB_ERR_PTR(err) ((void *)(long)(err)) + +/* Common case of returning true, false or -ve error. */ +typedef int tdb_bool_err; + +/* Prevent others from opening the file. */ +#define TDB_OPEN_LOCK 0 +/* Doing a transaction. */ +#define TDB_TRANSACTION_LOCK 1 +/* Expanding file. */ +#define TDB_EXPANSION_LOCK 2 +/* Hash chain locks. */ +#define TDB_HASH_LOCK_START 64 + +/* Range for hash locks. */ +#define TDB_HASH_LOCK_RANGE_BITS 30 +#define TDB_HASH_LOCK_RANGE (1 << TDB_HASH_LOCK_RANGE_BITS) + +/* We have 1024 entries in the top level. */ +#define TDB_TOPLEVEL_HASH_BITS 10 +/* And 64 entries in each sub-level: thus 64 bits exactly after 9 levels. */ +#define TDB_SUBLEVEL_HASH_BITS 6 +/* And 8 entries in each group, ie 8 groups per sublevel. */ +#define TDB_HASH_GROUP_BITS 3 +/* This is currently 10: beyond this we chain. */ +#define TDB_MAX_LEVELS (1+(64-TDB_TOPLEVEL_HASH_BITS) / TDB_SUBLEVEL_HASH_BITS) + +/* Extend file by least 100 times larger than needed. */ +#define TDB_EXTENSION_FACTOR 100 + +/* We steal bits from the offsets to store hash info. */ +#define TDB_OFF_HASH_GROUP_MASK ((1ULL << TDB_HASH_GROUP_BITS) - 1) +/* We steal this many upper bits, giving a maximum offset of 64 exabytes. */ +#define TDB_OFF_UPPER_STEAL 8 +#define TDB_OFF_UPPER_STEAL_EXTRA 7 +/* The bit number where we store extra hash bits. */ +#define TDB_OFF_HASH_EXTRA_BIT 57 +#define TDB_OFF_UPPER_STEAL_SUBHASH_BIT 56 + +/* Additional features we understand. Currently: none. */ +#define TDB_FEATURE_MASK ((uint64_t)0) + +/* The bit number where we store the extra hash bits. */ +/* Convenience mask to get actual offset. */ +#define TDB_OFF_MASK \ + (((1ULL << (64 - TDB_OFF_UPPER_STEAL)) - 1) - TDB_OFF_HASH_GROUP_MASK) + +/* How many buckets in a free list: see size_to_bucket(). */ +#define TDB_FREE_BUCKETS (64 - TDB_OFF_UPPER_STEAL) + +/* We have to be able to fit a free record here. */ +#define TDB_MIN_DATA_LEN \ + (sizeof(struct tdb_free_record) - sizeof(struct tdb_used_record)) + +/* Indicates this entry is not on an flist (can happen during coalescing) */ +#define TDB_FTABLE_NONE ((1ULL << TDB_OFF_UPPER_STEAL) - 1) + +struct tdb_used_record { + /* For on-disk compatibility, we avoid bitfields: + magic: 16, (highest) + key_len_bits: 5, + extra_padding: 32 + hash_bits: 11 + */ + uint64_t magic_and_meta; + /* The bottom key_len_bits*2 are key length, rest is data length. */ + uint64_t key_and_data_len; +}; + +static inline unsigned rec_key_bits(const struct tdb_used_record *r) +{ + return ((r->magic_and_meta >> 43) & ((1 << 5)-1)) * 2; +} + +static inline uint64_t rec_key_length(const struct tdb_used_record *r) +{ + return r->key_and_data_len & ((1ULL << rec_key_bits(r)) - 1); +} + +static inline uint64_t rec_data_length(const struct tdb_used_record *r) +{ + return r->key_and_data_len >> rec_key_bits(r); +} + +static inline uint64_t rec_extra_padding(const struct tdb_used_record *r) +{ + return (r->magic_and_meta >> 11) & 0xFFFFFFFF; +} + +static inline uint32_t rec_hash(const struct tdb_used_record *r) +{ + return r->magic_and_meta & ((1 << 11) - 1); +} + +static inline uint16_t rec_magic(const struct tdb_used_record *r) +{ + return (r->magic_and_meta >> 48); +} + +struct tdb_free_record { + uint64_t magic_and_prev; /* TDB_OFF_UPPER_STEAL bits magic, then prev */ + uint64_t ftable_and_len; /* Len not counting these two fields. */ + /* This is why the minimum record size is 8 bytes. */ + uint64_t next; +}; + +static inline uint64_t frec_prev(const struct tdb_free_record *f) +{ + return f->magic_and_prev & ((1ULL << (64 - TDB_OFF_UPPER_STEAL)) - 1); +} + +static inline uint64_t frec_magic(const struct tdb_free_record *f) +{ + return f->magic_and_prev >> (64 - TDB_OFF_UPPER_STEAL); +} + +static inline uint64_t frec_len(const struct tdb_free_record *f) +{ + return f->ftable_and_len & ((1ULL << (64 - TDB_OFF_UPPER_STEAL))-1); +} + +static inline unsigned frec_ftable(const struct tdb_free_record *f) +{ + return f->ftable_and_len >> (64 - TDB_OFF_UPPER_STEAL); +} + +struct tdb_recovery_record { + uint64_t magic; + /* Length of record (add this header to get total length). */ + uint64_t max_len; + /* Length used. */ + uint64_t len; + /* Old length of file before transaction. */ + uint64_t eof; +}; + +/* If we bottom out of the subhashes, we chain. */ +struct tdb_chain { + tdb_off_t rec[1 << TDB_HASH_GROUP_BITS]; + tdb_off_t next; +}; + +/* this is stored at the front of every database */ +struct tdb_header { + char magic_food[64]; /* for /etc/magic */ + /* FIXME: Make me 32 bit? */ + uint64_t version; /* version of the code */ + uint64_t hash_test; /* result of hashing HASH_MAGIC. */ + uint64_t hash_seed; /* "random" seed written at creation time. */ + tdb_off_t free_table; /* (First) free table. */ + tdb_off_t recovery; /* Transaction recovery area. */ + + uint64_t features_used; /* Features all writers understand */ + uint64_t features_offered; /* Features offered */ + + uint64_t seqnum; /* Sequence number for TDB_SEQNUM */ + + tdb_off_t reserved[23]; + + /* Top level hash table. */ + tdb_off_t hashtable[1ULL << TDB_TOPLEVEL_HASH_BITS]; +}; + +struct tdb_freetable { + struct tdb_used_record hdr; + tdb_off_t next; + tdb_off_t buckets[TDB_FREE_BUCKETS]; +}; + +/* Information about a particular (locked) hash entry. */ +struct hash_info { + /* Full hash value of entry. */ + uint64_t h; + /* Start and length of lock acquired. */ + tdb_off_t hlock_start; + tdb_len_t hlock_range; + /* Start of hash group. */ + tdb_off_t group_start; + /* Bucket we belong in. */ + unsigned int home_bucket; + /* Bucket we (or an empty space) were found in. */ + unsigned int found_bucket; + /* How many bits of the hash are already used. */ + unsigned int hash_used; + /* Current working group. */ + tdb_off_t group[1 << TDB_HASH_GROUP_BITS]; +}; + +struct traverse_info { + struct traverse_level { + tdb_off_t hashtable; + /* We ignore groups here, and treat it as a big array. */ + unsigned entry; + unsigned int total_buckets; + } levels[TDB_MAX_LEVELS + 1]; + unsigned int num_levels; + unsigned int toplevel_group; + /* This makes delete-everything-inside-traverse work as expected. */ + tdb_off_t prev; +}; + +enum tdb_lock_flags { + /* WAIT == F_SETLKW, NOWAIT == F_SETLK */ + TDB_LOCK_NOWAIT = 0, + TDB_LOCK_WAIT = 1, + /* If set, don't log an error on failure. */ + TDB_LOCK_PROBE = 2, + /* If set, don't check for recovery (used by recovery code). */ + TDB_LOCK_NOCHECK = 4, +}; + +struct tdb_lock { + struct tdb_context *owner; + uint32_t off; + uint32_t count; + uint32_t ltype; +}; + +/* This is only needed for tdb_access_commit, but used everywhere to + * simplify. */ +struct tdb_access_hdr { + struct tdb_access_hdr *next; + tdb_off_t off; + tdb_len_t len; + bool convert; +}; + +struct tdb_file { + /* Single list of all TDBs, to detect multiple opens. */ + struct tdb_file *next; + + /* How many are sharing us? */ + unsigned int refcnt; + + /* Mmap (if any), or malloc (for TDB_INTERNAL). */ + void *map_ptr; + + /* How much space has been mapped (<= current file size) */ + tdb_len_t map_size; + + /* The file descriptor (-1 for TDB_INTERNAL). */ + int fd; + + /* Lock information */ + pid_t locker; + struct tdb_lock allrecord_lock; + size_t num_lockrecs; + struct tdb_lock *lockrecs; + + /* Identity of this file. */ + dev_t device; + ino_t inode; +}; + +struct tdb_context { + /* Filename of the database. */ + const char *name; + + /* Are we accessing directly? (debugging check). */ + int direct_access; + + /* Operating read-only? (Opened O_RDONLY, or in traverse_read) */ + bool read_only; + + /* mmap read only? */ + int mmap_flags; + + /* the flags passed to tdb_open, for tdb_reopen. */ + uint32_t flags; + + /* Logging function */ + void (*log_fn)(struct tdb_context *tdb, + enum tdb_log_level level, + const char *message, + void *data); + void *log_data; + + /* Hash function. */ + uint64_t (*hash_fn)(const void *key, size_t len, uint64_t seed, void *); + void *hash_data; + uint64_t hash_seed; + + /* low level (fnctl) lock functions. */ + int (*lock_fn)(int fd, int rw, off_t off, off_t len, bool w, void *); + int (*unlock_fn)(int fd, int rw, off_t off, off_t len, void *); + void *lock_data; + + /* Set if we are in a transaction. */ + struct tdb_transaction *transaction; + + /* What free table are we using? */ + tdb_off_t ftable_off; + unsigned int ftable; + + /* IO methods: changes for transactions. */ + const struct tdb_methods *methods; + + /* Our statistics. */ + struct tdb_attribute_stats stats; + + /* Direct access information */ + struct tdb_access_hdr *access; + + /* Last error we returned. */ + enum TDB_ERROR last_error; + + /* The actual file information */ + struct tdb_file *file; +}; + +struct tdb_methods { + enum TDB_ERROR (*tread)(struct tdb_context *, tdb_off_t, void *, + tdb_len_t); + enum TDB_ERROR (*twrite)(struct tdb_context *, tdb_off_t, const void *, + tdb_len_t); + enum TDB_ERROR (*oob)(struct tdb_context *, tdb_off_t, bool); + enum TDB_ERROR (*expand_file)(struct tdb_context *, tdb_len_t); + void *(*direct)(struct tdb_context *, tdb_off_t, size_t, bool); +}; + +/* + internal prototypes +*/ +/* hash.c: */ +tdb_bool_err first_in_hash(struct tdb_context *tdb, + struct traverse_info *tinfo, + TDB_DATA *kbuf, size_t *dlen); + +tdb_bool_err next_in_hash(struct tdb_context *tdb, + struct traverse_info *tinfo, + TDB_DATA *kbuf, size_t *dlen); + +/* Hash random memory. */ +uint64_t tdb_hash(struct tdb_context *tdb, const void *ptr, size_t len); + +/* Hash on disk. */ +uint64_t hash_record(struct tdb_context *tdb, tdb_off_t off); + +/* Find and lock a hash entry (or where it would be). */ +tdb_off_t find_and_lock(struct tdb_context *tdb, + struct tdb_data key, + int ltype, + struct hash_info *h, + struct tdb_used_record *rec, + struct traverse_info *tinfo); + +enum TDB_ERROR replace_in_hash(struct tdb_context *tdb, + struct hash_info *h, + tdb_off_t new_off); + +enum TDB_ERROR add_to_hash(struct tdb_context *tdb, struct hash_info *h, + tdb_off_t new_off); + +enum TDB_ERROR delete_from_hash(struct tdb_context *tdb, struct hash_info *h); + +/* For tdb_check */ +bool is_subhash(tdb_off_t val); + +/* free.c: */ +enum TDB_ERROR tdb_ftable_init(struct tdb_context *tdb); + +/* check.c needs these to iterate through free lists. */ +tdb_off_t first_ftable(struct tdb_context *tdb); +tdb_off_t next_ftable(struct tdb_context *tdb, tdb_off_t ftable); + +/* This returns space or -ve error number. */ +tdb_off_t alloc(struct tdb_context *tdb, size_t keylen, size_t datalen, + uint64_t hash, unsigned magic, bool growing); + +/* Put this record in a free list. */ +enum TDB_ERROR add_free_record(struct tdb_context *tdb, + tdb_off_t off, tdb_len_t len_with_header, + enum tdb_lock_flags waitflag, + bool coalesce_ok); + +/* Set up header for a used/ftable/htable/chain record. */ +enum TDB_ERROR set_header(struct tdb_context *tdb, + struct tdb_used_record *rec, + unsigned magic, uint64_t keylen, uint64_t datalen, + uint64_t actuallen, unsigned hashlow); + +/* Used by tdb_check to verify. */ +unsigned int size_to_bucket(tdb_len_t data_len); +tdb_off_t bucket_off(tdb_off_t ftable_off, unsigned bucket); + +/* Used by tdb_summary */ +tdb_off_t dead_space(struct tdb_context *tdb, tdb_off_t off); + +/* io.c: */ +/* Initialize tdb->methods. */ +void tdb_io_init(struct tdb_context *tdb); + +/* Convert endian of the buffer if required. */ +void *tdb_convert(const struct tdb_context *tdb, void *buf, tdb_len_t size); + +/* Unmap and try to map the tdb. */ +void tdb_munmap(struct tdb_file *file); +void tdb_mmap(struct tdb_context *tdb); + +/* Either alloc a copy, or give direct access. Release frees or noop. */ +const void *tdb_access_read(struct tdb_context *tdb, + tdb_off_t off, tdb_len_t len, bool convert); +void *tdb_access_write(struct tdb_context *tdb, + tdb_off_t off, tdb_len_t len, bool convert); + +/* Release result of tdb_access_read/write. */ +void tdb_access_release(struct tdb_context *tdb, const void *p); +/* Commit result of tdb_acces_write. */ +enum TDB_ERROR tdb_access_commit(struct tdb_context *tdb, void *p); + +/* Convenience routine to get an offset. */ +tdb_off_t tdb_read_off(struct tdb_context *tdb, tdb_off_t off); + +/* Write an offset at an offset. */ +enum TDB_ERROR tdb_write_off(struct tdb_context *tdb, tdb_off_t off, + tdb_off_t val); + +/* Clear an ondisk area. */ +enum TDB_ERROR zero_out(struct tdb_context *tdb, tdb_off_t off, tdb_len_t len); + +/* Return a non-zero offset between >= start < end in this array (or end). */ +tdb_off_t tdb_find_nonzero_off(struct tdb_context *tdb, + tdb_off_t base, + uint64_t start, + uint64_t end); + +/* Return a zero offset in this array, or num. */ +tdb_off_t tdb_find_zero_off(struct tdb_context *tdb, tdb_off_t off, + uint64_t num); + +/* Allocate and make a copy of some offset. */ +void *tdb_alloc_read(struct tdb_context *tdb, tdb_off_t offset, tdb_len_t len); + +/* Writes a converted copy of a record. */ +enum TDB_ERROR tdb_write_convert(struct tdb_context *tdb, tdb_off_t off, + const void *rec, size_t len); + +/* Reads record and converts it */ +enum TDB_ERROR tdb_read_convert(struct tdb_context *tdb, tdb_off_t off, + void *rec, size_t len); + +/* Bump the seqnum (caller checks for tdb->flags & TDB_SEQNUM) */ +void tdb_inc_seqnum(struct tdb_context *tdb); + +/* lock.c: */ +/* Lock/unlock a range of hashes. */ +enum TDB_ERROR tdb_lock_hashes(struct tdb_context *tdb, + tdb_off_t hash_lock, tdb_len_t hash_range, + int ltype, enum tdb_lock_flags waitflag); +enum TDB_ERROR tdb_unlock_hashes(struct tdb_context *tdb, + tdb_off_t hash_lock, + tdb_len_t hash_range, int ltype); + +/* For closing the file. */ +void tdb_lock_cleanup(struct tdb_context *tdb); + +/* Lock/unlock a particular free bucket. */ +enum TDB_ERROR tdb_lock_free_bucket(struct tdb_context *tdb, tdb_off_t b_off, + enum tdb_lock_flags waitflag); +void tdb_unlock_free_bucket(struct tdb_context *tdb, tdb_off_t b_off); + +/* Serialize transaction start. */ +enum TDB_ERROR tdb_transaction_lock(struct tdb_context *tdb, int ltype); +void tdb_transaction_unlock(struct tdb_context *tdb, int ltype); + +/* Do we have any hash locks (ie. via tdb_chainlock) ? */ +bool tdb_has_hash_locks(struct tdb_context *tdb); + +/* Lock entire database. */ +enum TDB_ERROR tdb_allrecord_lock(struct tdb_context *tdb, int ltype, + enum tdb_lock_flags flags, bool upgradable); +void tdb_allrecord_unlock(struct tdb_context *tdb, int ltype); +enum TDB_ERROR tdb_allrecord_upgrade(struct tdb_context *tdb); + +/* Serialize db open. */ +enum TDB_ERROR tdb_lock_open(struct tdb_context *tdb, + int ltype, enum tdb_lock_flags flags); +void tdb_unlock_open(struct tdb_context *tdb, int ltype); +bool tdb_has_open_lock(struct tdb_context *tdb); + +/* Serialize db expand. */ +enum TDB_ERROR tdb_lock_expand(struct tdb_context *tdb, int ltype); +void tdb_unlock_expand(struct tdb_context *tdb, int ltype); +bool tdb_has_expansion_lock(struct tdb_context *tdb); + +/* If it needs recovery, grab all the locks and do it. */ +enum TDB_ERROR tdb_lock_and_recover(struct tdb_context *tdb); + +/* Default lock and unlock functions. */ +int tdb_fcntl_lock(int fd, int rw, off_t off, off_t len, bool waitflag, void *); +int tdb_fcntl_unlock(int fd, int rw, off_t off, off_t len, void *); + +/* transaction.c: */ +enum TDB_ERROR tdb_transaction_recover(struct tdb_context *tdb); +tdb_bool_err tdb_needs_recovery(struct tdb_context *tdb); + +/* tdb.c: */ +enum TDB_ERROR COLD tdb_logerr(struct tdb_context *tdb, + enum TDB_ERROR ecode, + enum tdb_log_level level, + const char *fmt, ...); + +#ifdef TDB_TRACE +void tdb_trace(struct tdb_context *tdb, const char *op); +void tdb_trace_seqnum(struct tdb_context *tdb, uint32_t seqnum, const char *op); +void tdb_trace_open(struct tdb_context *tdb, const char *op, + unsigned hash_size, unsigned tdb_flags, unsigned open_flags); +void tdb_trace_ret(struct tdb_context *tdb, const char *op, int ret); +void tdb_trace_retrec(struct tdb_context *tdb, const char *op, TDB_DATA ret); +void tdb_trace_1rec(struct tdb_context *tdb, const char *op, + TDB_DATA rec); +void tdb_trace_1rec_ret(struct tdb_context *tdb, const char *op, + TDB_DATA rec, int ret); +void tdb_trace_1rec_retrec(struct tdb_context *tdb, const char *op, + TDB_DATA rec, TDB_DATA ret); +void tdb_trace_2rec_flag_ret(struct tdb_context *tdb, const char *op, + TDB_DATA rec1, TDB_DATA rec2, unsigned flag, + int ret); +void tdb_trace_2rec_retrec(struct tdb_context *tdb, const char *op, + TDB_DATA rec1, TDB_DATA rec2, TDB_DATA ret); +#else +#define tdb_trace(tdb, op) +#define tdb_trace_seqnum(tdb, seqnum, op) +#define tdb_trace_open(tdb, op, hash_size, tdb_flags, open_flags) +#define tdb_trace_ret(tdb, op, ret) +#define tdb_trace_retrec(tdb, op, ret) +#define tdb_trace_1rec(tdb, op, rec) +#define tdb_trace_1rec_ret(tdb, op, rec, ret) +#define tdb_trace_1rec_retrec(tdb, op, rec, ret) +#define tdb_trace_2rec_flag_ret(tdb, op, rec1, rec2, flag, ret) +#define tdb_trace_2rec_retrec(tdb, op, rec1, rec2, ret) +#endif /* !TDB_TRACE */ + +#endif diff --git a/lib/tdb2/pytdb.c b/lib/tdb2/pytdb.c new file mode 100644 index 0000000000..c760045508 --- /dev/null +++ b/lib/tdb2/pytdb.c @@ -0,0 +1,586 @@ +/* + Unix SMB/CIFS implementation. + + Python interface to tdb2. Simply modified from tdb1 version. + + Copyright (C) 2004-2006 Tim Potter <tpot@samba.org> + Copyright (C) 2007-2008 Jelmer Vernooij <jelmer@samba.org> + Copyright (C) 2011 Rusty Russell <rusty@rustcorp.com.au> + + ** NOTE! The following LGPL license applies to the tdb + ** library. This does NOT imply that all of Samba is released + ** under the LGPL + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 3 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, see <http://www.gnu.org/licenses/>. +*/ + +#include <Python.h> +#include "replace.h" +#include "system/filesys.h" + +#ifndef Py_RETURN_NONE +#define Py_RETURN_NONE return Py_INCREF(Py_None), Py_None +#endif + +/* Include tdb headers */ +#include <tdb2.h> + +typedef struct { + PyObject_HEAD + struct tdb_context *ctx; + bool closed; +} PyTdbObject; + +staticforward PyTypeObject PyTdb; + +static void PyErr_SetTDBError(enum TDB_ERROR e) +{ + PyErr_SetObject(PyExc_RuntimeError, + Py_BuildValue("(i,s)", e, tdb_errorstr(e))); +} + +static TDB_DATA PyString_AsTDB_DATA(PyObject *data) +{ + TDB_DATA ret; + ret.dptr = (unsigned char *)PyString_AsString(data); + ret.dsize = PyString_Size(data); + return ret; +} + +static PyObject *PyString_FromTDB_DATA(TDB_DATA data) +{ + PyObject *ret = PyString_FromStringAndSize((const char *)data.dptr, + data.dsize); + free(data.dptr); + return ret; +} + +#define PyErr_TDB_ERROR_IS_ERR_RAISE(ret) \ + if (ret != TDB_SUCCESS) { \ + PyErr_SetTDBError(ret); \ + return NULL; \ + } + +static void stderr_log(struct tdb_context *tdb, + enum tdb_log_level level, + const char *message, + void *data) +{ + fprintf(stderr, "%s:%s\n", tdb_name(tdb), message); +} + +static PyObject *py_tdb_open(PyTypeObject *type, PyObject *args, PyObject *kwargs) +{ + char *name = NULL; + int tdb_flags = TDB_DEFAULT, flags = O_RDWR, mode = 0600; + struct tdb_context *ctx; + PyTdbObject *ret; + union tdb_attribute logattr; + const char *kwnames[] = { "name", "tdb_flags", "flags", "mode", NULL }; + + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|siii", (char **)kwnames, &name, &tdb_flags, &flags, &mode)) + return NULL; + + if (name == NULL) { + tdb_flags |= TDB_INTERNAL; + } + + logattr.log.base.attr = TDB_ATTRIBUTE_LOG; + logattr.log.base.next = NULL; + logattr.log.fn = stderr_log; + ctx = tdb_open(name, tdb_flags, flags, mode, &logattr); + if (ctx == NULL) { + PyErr_SetFromErrno(PyExc_IOError); + return NULL; + } + + ret = PyObject_New(PyTdbObject, &PyTdb); + if (!ret) { + tdb_close(ctx); + return NULL; + } + + ret->ctx = ctx; + ret->closed = false; + return (PyObject *)ret; +} + +static PyObject *obj_transaction_cancel(PyTdbObject *self) +{ + tdb_transaction_cancel(self->ctx); + Py_RETURN_NONE; +} + +static PyObject *obj_transaction_commit(PyTdbObject *self) +{ + enum TDB_ERROR ret = tdb_transaction_commit(self->ctx); + PyErr_TDB_ERROR_IS_ERR_RAISE(ret); + Py_RETURN_NONE; +} + +static PyObject *obj_transaction_prepare_commit(PyTdbObject *self) +{ + enum TDB_ERROR ret = tdb_transaction_prepare_commit(self->ctx); + PyErr_TDB_ERROR_IS_ERR_RAISE(ret); + Py_RETURN_NONE; +} + +static PyObject *obj_transaction_start(PyTdbObject *self) +{ + enum TDB_ERROR ret = tdb_transaction_start(self->ctx); + PyErr_TDB_ERROR_IS_ERR_RAISE(ret); + Py_RETURN_NONE; +} + +static PyObject *obj_lockall(PyTdbObject *self) +{ + enum TDB_ERROR ret = tdb_lockall(self->ctx); + PyErr_TDB_ERROR_IS_ERR_RAISE(ret); + Py_RETURN_NONE; +} + +static PyObject *obj_unlockall(PyTdbObject *self) +{ + tdb_unlockall(self->ctx); + Py_RETURN_NONE; +} + +static PyObject *obj_lockall_read(PyTdbObject *self) +{ + enum TDB_ERROR ret = tdb_lockall_read(self->ctx); + PyErr_TDB_ERROR_IS_ERR_RAISE(ret); + Py_RETURN_NONE; +} + +static PyObject *obj_unlockall_read(PyTdbObject *self) +{ + tdb_unlockall_read(self->ctx); + Py_RETURN_NONE; +} + +static PyObject *obj_close(PyTdbObject *self) +{ + enum TDB_ERROR ret; + if (self->closed) + Py_RETURN_NONE; + ret = tdb_close(self->ctx); + self->closed = true; + PyErr_TDB_ERROR_IS_ERR_RAISE(ret); + Py_RETURN_NONE; +} + +static PyObject *obj_get(PyTdbObject *self, PyObject *args) +{ + TDB_DATA key, data; + PyObject *py_key; + enum TDB_ERROR ret; + if (!PyArg_ParseTuple(args, "O", &py_key)) + return NULL; + + key = PyString_AsTDB_DATA(py_key); + ret = tdb_fetch(self->ctx, key, &data); + if (ret == TDB_ERR_NOEXIST) + Py_RETURN_NONE; + PyErr_TDB_ERROR_IS_ERR_RAISE(ret); + return PyString_FromTDB_DATA(data); +} + +static PyObject *obj_append(PyTdbObject *self, PyObject *args) +{ + TDB_DATA key, data; + PyObject *py_key, *py_data; + enum TDB_ERROR ret; + if (!PyArg_ParseTuple(args, "OO", &py_key, &py_data)) + return NULL; + + key = PyString_AsTDB_DATA(py_key); + data = PyString_AsTDB_DATA(py_data); + + ret = tdb_append(self->ctx, key, data); + PyErr_TDB_ERROR_IS_ERR_RAISE(ret); + Py_RETURN_NONE; +} + +static PyObject *obj_firstkey(PyTdbObject *self) +{ + enum TDB_ERROR ret; + TDB_DATA key; + + ret = tdb_firstkey(self->ctx, &key); + if (ret == TDB_ERR_NOEXIST) + Py_RETURN_NONE; + PyErr_TDB_ERROR_IS_ERR_RAISE(ret); + + return PyString_FromTDB_DATA(key); +} + +static PyObject *obj_nextkey(PyTdbObject *self, PyObject *args) +{ + TDB_DATA key; + PyObject *py_key; + enum TDB_ERROR ret; + if (!PyArg_ParseTuple(args, "O", &py_key)) + return NULL; + + /* Malloc here, since tdb_nextkey frees. */ + key.dsize = PyString_Size(py_key); + key.dptr = malloc(key.dsize); + memcpy(key.dptr, PyString_AsString(py_key), key.dsize); + + ret = tdb_nextkey(self->ctx, &key); + if (ret == TDB_ERR_NOEXIST) + Py_RETURN_NONE; + PyErr_TDB_ERROR_IS_ERR_RAISE(ret); + + return PyString_FromTDB_DATA(key); +} + +static PyObject *obj_delete(PyTdbObject *self, PyObject *args) +{ + TDB_DATA key; + PyObject *py_key; + enum TDB_ERROR ret; + if (!PyArg_ParseTuple(args, "O", &py_key)) + return NULL; + + key = PyString_AsTDB_DATA(py_key); + ret = tdb_delete(self->ctx, key); + PyErr_TDB_ERROR_IS_ERR_RAISE(ret); + Py_RETURN_NONE; +} + +static PyObject *obj_has_key(PyTdbObject *self, PyObject *args) +{ + TDB_DATA key; + enum TDB_ERROR ret; + PyObject *py_key; + if (!PyArg_ParseTuple(args, "O", &py_key)) + return NULL; + + key = PyString_AsTDB_DATA(py_key); + ret = tdb_exists(self->ctx, key); + if (ret == TDB_ERR_NOEXIST) + return Py_False; + PyErr_TDB_ERROR_IS_ERR_RAISE(ret); + return Py_True; +} + +static PyObject *obj_store(PyTdbObject *self, PyObject *args) +{ + TDB_DATA key, value; + enum TDB_ERROR ret; + int flag = TDB_REPLACE; + PyObject *py_key, *py_value; + + if (!PyArg_ParseTuple(args, "OO|i", &py_key, &py_value, &flag)) + return NULL; + + key = PyString_AsTDB_DATA(py_key); + value = PyString_AsTDB_DATA(py_value); + + ret = tdb_store(self->ctx, key, value, flag); + PyErr_TDB_ERROR_IS_ERR_RAISE(ret); + Py_RETURN_NONE; +} + +static PyObject *obj_add_flag(PyTdbObject *self, PyObject *args) +{ + unsigned flag; + + if (!PyArg_ParseTuple(args, "I", &flag)) + return NULL; + + tdb_add_flag(self->ctx, flag); + Py_RETURN_NONE; +} + +static PyObject *obj_remove_flag(PyTdbObject *self, PyObject *args) +{ + unsigned flag; + + if (!PyArg_ParseTuple(args, "I", &flag)) + return NULL; + + tdb_remove_flag(self->ctx, flag); + Py_RETURN_NONE; +} + +typedef struct { + PyObject_HEAD + TDB_DATA current; + bool end; + PyTdbObject *iteratee; +} PyTdbIteratorObject; + +static PyObject *tdb_iter_next(PyTdbIteratorObject *self) +{ + enum TDB_ERROR e; + PyObject *ret; + if (self->end) + return NULL; + ret = PyString_FromStringAndSize((const char *)self->current.dptr, + self->current.dsize); + e = tdb_nextkey(self->iteratee->ctx, &self->current); + if (e == TDB_ERR_NOEXIST) + self->end = true; + else + PyErr_TDB_ERROR_IS_ERR_RAISE(e); + return ret; +} + +static void tdb_iter_dealloc(PyTdbIteratorObject *self) +{ + Py_DECREF(self->iteratee); + PyObject_Del(self); +} + +PyTypeObject PyTdbIterator = { + .tp_name = "Iterator", + .tp_basicsize = sizeof(PyTdbIteratorObject), + .tp_iternext = (iternextfunc)tdb_iter_next, + .tp_dealloc = (destructor)tdb_iter_dealloc, + .tp_flags = Py_TPFLAGS_DEFAULT, + .tp_iter = PyObject_SelfIter, +}; + +static PyObject *tdb_object_iter(PyTdbObject *self) +{ + PyTdbIteratorObject *ret; + enum TDB_ERROR e; + + ret = PyObject_New(PyTdbIteratorObject, &PyTdbIterator); + if (!ret) + return NULL; + e = tdb_firstkey(self->ctx, &ret->current); + if (e == TDB_ERR_NOEXIST) { + ret->end = true; + } else { + PyErr_TDB_ERROR_IS_ERR_RAISE(e); + ret->end = false; + } + ret->iteratee = self; + Py_INCREF(self); + return (PyObject *)ret; +} + +static PyObject *obj_clear(PyTdbObject *self) +{ + enum TDB_ERROR ret = tdb_wipe_all(self->ctx); + PyErr_TDB_ERROR_IS_ERR_RAISE(ret); + Py_RETURN_NONE; +} + +static PyObject *obj_enable_seqnum(PyTdbObject *self) +{ + tdb_add_flag(self->ctx, TDB_SEQNUM); + Py_RETURN_NONE; +} + +static PyMethodDef tdb_object_methods[] = { + { "transaction_cancel", (PyCFunction)obj_transaction_cancel, METH_NOARGS, + "S.transaction_cancel() -> None\n" + "Cancel the currently active transaction." }, + { "transaction_commit", (PyCFunction)obj_transaction_commit, METH_NOARGS, + "S.transaction_commit() -> None\n" + "Commit the currently active transaction." }, + { "transaction_prepare_commit", (PyCFunction)obj_transaction_prepare_commit, METH_NOARGS, + "S.transaction_prepare_commit() -> None\n" + "Prepare to commit the currently active transaction" }, + { "transaction_start", (PyCFunction)obj_transaction_start, METH_NOARGS, + "S.transaction_start() -> None\n" + "Start a new transaction." }, + { "lock_all", (PyCFunction)obj_lockall, METH_NOARGS, NULL }, + { "unlock_all", (PyCFunction)obj_unlockall, METH_NOARGS, NULL }, + { "read_lock_all", (PyCFunction)obj_lockall_read, METH_NOARGS, NULL }, + { "read_unlock_all", (PyCFunction)obj_unlockall_read, METH_NOARGS, NULL }, + { "close", (PyCFunction)obj_close, METH_NOARGS, NULL }, + { "get", (PyCFunction)obj_get, METH_VARARGS, "S.get(key) -> value\n" + "Fetch a value." }, + { "append", (PyCFunction)obj_append, METH_VARARGS, "S.append(key, value) -> None\n" + "Append data to an existing key." }, + { "firstkey", (PyCFunction)obj_firstkey, METH_NOARGS, "S.firstkey() -> data\n" + "Return the first key in this database." }, + { "nextkey", (PyCFunction)obj_nextkey, METH_NOARGS, "S.nextkey(key) -> data\n" + "Return the next key in this database." }, + { "delete", (PyCFunction)obj_delete, METH_VARARGS, "S.delete(key) -> None\n" + "Delete an entry." }, + { "has_key", (PyCFunction)obj_has_key, METH_VARARGS, "S.has_key(key) -> None\n" + "Check whether key exists in this database." }, + { "store", (PyCFunction)obj_store, METH_VARARGS, "S.store(key, data, flag=REPLACE) -> None" + "Store data." }, + { "add_flag", (PyCFunction)obj_add_flag, METH_VARARGS, "S.add_flag(flag) -> None" }, + { "remove_flag", (PyCFunction)obj_remove_flag, METH_VARARGS, "S.remove_flag(flag) -> None" }, + { "iterkeys", (PyCFunction)tdb_object_iter, METH_NOARGS, "S.iterkeys() -> iterator" }, + { "clear", (PyCFunction)obj_clear, METH_NOARGS, "S.clear() -> None\n" + "Wipe the entire database." }, + { "enable_seqnum", (PyCFunction)obj_enable_seqnum, METH_NOARGS, + "S.enable_seqnum() -> None" }, + { NULL } +}; + +static PyObject *obj_get_flags(PyTdbObject *self, void *closure) +{ + return PyInt_FromLong(tdb_get_flags(self->ctx)); +} + +static PyObject *obj_get_filename(PyTdbObject *self, void *closure) +{ + return PyString_FromString(tdb_name(self->ctx)); +} + +static PyObject *obj_get_seqnum(PyTdbObject *self, void *closure) +{ + return PyInt_FromLong(tdb_get_seqnum(self->ctx)); +} + + +static PyGetSetDef tdb_object_getsetters[] = { + { (char *)"flags", (getter)obj_get_flags, NULL, NULL }, + { (char *)"filename", (getter)obj_get_filename, NULL, (char *)"The filename of this TDB file."}, + { (char *)"seqnum", (getter)obj_get_seqnum, NULL, NULL }, + { NULL } +}; + +static PyObject *tdb_object_repr(PyTdbObject *self) +{ + if (tdb_get_flags(self->ctx) & TDB_INTERNAL) { + return PyString_FromString("Tdb(<internal>)"); + } else { + return PyString_FromFormat("Tdb('%s')", tdb_name(self->ctx)); + } +} + +static void tdb_object_dealloc(PyTdbObject *self) +{ + if (!self->closed) + tdb_close(self->ctx); + self->ob_type->tp_free(self); +} + +static PyObject *obj_getitem(PyTdbObject *self, PyObject *key) +{ + TDB_DATA tkey, val; + enum TDB_ERROR ret; + + if (!PyString_Check(key)) { + PyErr_SetString(PyExc_TypeError, "Expected string as key"); + return NULL; + } + + tkey.dptr = (unsigned char *)PyString_AsString(key); + tkey.dsize = PyString_Size(key); + + ret = tdb_fetch(self->ctx, tkey, &val); + if (ret == TDB_ERR_NOEXIST) { + PyErr_SetString(PyExc_KeyError, "No such TDB entry"); + return NULL; + } else { + PyErr_TDB_ERROR_IS_ERR_RAISE(ret); + return PyString_FromTDB_DATA(val); + } +} + +static int obj_setitem(PyTdbObject *self, PyObject *key, PyObject *value) +{ + TDB_DATA tkey, tval; + enum TDB_ERROR ret; + if (!PyString_Check(key)) { + PyErr_SetString(PyExc_TypeError, "Expected string as key"); + return -1; + } + + tkey = PyString_AsTDB_DATA(key); + + if (value == NULL) { + ret = tdb_delete(self->ctx, tkey); + } else { + if (!PyString_Check(value)) { + PyErr_SetString(PyExc_TypeError, "Expected string as value"); + return -1; + } + + tval = PyString_AsTDB_DATA(value); + + ret = tdb_store(self->ctx, tkey, tval, TDB_REPLACE); + } + + if (ret != TDB_SUCCESS) { + PyErr_SetTDBError(ret); + return -1; + } + + return ret; +} + +static PyMappingMethods tdb_object_mapping = { + .mp_subscript = (binaryfunc)obj_getitem, + .mp_ass_subscript = (objobjargproc)obj_setitem, +}; +static PyTypeObject PyTdb = { + .tp_name = "Tdb", + .tp_basicsize = sizeof(PyTdbObject), + .tp_methods = tdb_object_methods, + .tp_getset = tdb_object_getsetters, + .tp_new = py_tdb_open, + .tp_doc = "A TDB file", + .tp_repr = (reprfunc)tdb_object_repr, + .tp_dealloc = (destructor)tdb_object_dealloc, + .tp_as_mapping = &tdb_object_mapping, + .tp_flags = Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE|Py_TPFLAGS_HAVE_ITER, + .tp_iter = (getiterfunc)tdb_object_iter, +}; + +static PyMethodDef tdb_methods[] = { + { "open", (PyCFunction)py_tdb_open, METH_VARARGS|METH_KEYWORDS, "open(name, hash_size=0, tdb_flags=TDB_DEFAULT, flags=O_RDWR, mode=0600)\n" + "Open a TDB file." }, + { NULL } +}; + +void inittdb(void); +void inittdb(void) +{ + PyObject *m; + + if (PyType_Ready(&PyTdb) < 0) + return; + + if (PyType_Ready(&PyTdbIterator) < 0) + return; + + m = Py_InitModule3("tdb", tdb_methods, "TDB is a simple key-value database similar to GDBM that supports multiple writers."); + if (m == NULL) + return; + + PyModule_AddObject(m, "REPLACE", PyInt_FromLong(TDB_REPLACE)); + PyModule_AddObject(m, "INSERT", PyInt_FromLong(TDB_INSERT)); + PyModule_AddObject(m, "MODIFY", PyInt_FromLong(TDB_MODIFY)); + + PyModule_AddObject(m, "DEFAULT", PyInt_FromLong(TDB_DEFAULT)); + PyModule_AddObject(m, "INTERNAL", PyInt_FromLong(TDB_INTERNAL)); + PyModule_AddObject(m, "NOLOCK", PyInt_FromLong(TDB_NOLOCK)); + PyModule_AddObject(m, "NOMMAP", PyInt_FromLong(TDB_NOMMAP)); + PyModule_AddObject(m, "CONVERT", PyInt_FromLong(TDB_CONVERT)); + PyModule_AddObject(m, "NOSYNC", PyInt_FromLong(TDB_NOSYNC)); + PyModule_AddObject(m, "SEQNUM", PyInt_FromLong(TDB_SEQNUM)); + PyModule_AddObject(m, "ALLOW_NESTING", PyInt_FromLong(TDB_ALLOW_NESTING)); + + PyModule_AddObject(m, "__docformat__", PyString_FromString("restructuredText")); + + PyModule_AddObject(m, "__version__", PyString_FromString(PACKAGE_VERSION)); + + Py_INCREF(&PyTdb); + PyModule_AddObject(m, "Tdb", (PyObject *)&PyTdb); + + Py_INCREF(&PyTdbIterator); +} diff --git a/lib/tdb2/summary.c b/lib/tdb2/summary.c new file mode 100644 index 0000000000..26cdd3e4fe --- /dev/null +++ b/lib/tdb2/summary.c @@ -0,0 +1,282 @@ + /* + Trivial Database 2: human-readable summary code + Copyright (C) Rusty Russell 2010 + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 3 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, see <http://www.gnu.org/licenses/>. +*/ +#include "private.h" +#include <assert.h> +#include <ccan/tally/tally.h> + +static tdb_off_t count_hash(struct tdb_context *tdb, + tdb_off_t hash_off, unsigned bits) +{ + const tdb_off_t *h; + tdb_off_t count = 0; + unsigned int i; + + h = tdb_access_read(tdb, hash_off, sizeof(*h) << bits, true); + if (TDB_PTR_IS_ERR(h)) { + return TDB_PTR_ERR(h); + } + for (i = 0; i < (1 << bits); i++) + count += (h[i] != 0); + + tdb_access_release(tdb, h); + return count; +} + +static enum TDB_ERROR summarize(struct tdb_context *tdb, + struct tally *hashes, + struct tally *ftables, + struct tally *fr, + struct tally *keys, + struct tally *data, + struct tally *extra, + struct tally *uncoal, + struct tally *chains) +{ + tdb_off_t off; + tdb_len_t len; + tdb_len_t unc = 0; + + for (off = sizeof(struct tdb_header); + off < tdb->file->map_size; + off += len) { + const union { + struct tdb_used_record u; + struct tdb_free_record f; + struct tdb_recovery_record r; + } *p; + /* We might not be able to get the whole thing. */ + p = tdb_access_read(tdb, off, sizeof(p->f), true); + if (TDB_PTR_IS_ERR(p)) { + return TDB_PTR_ERR(p); + } + if (frec_magic(&p->f) != TDB_FREE_MAGIC) { + if (unc > 1) { + tally_add(uncoal, unc); + unc = 0; + } + } + + if (p->r.magic == TDB_RECOVERY_INVALID_MAGIC + || p->r.magic == TDB_RECOVERY_MAGIC) { + len = sizeof(p->r) + p->r.max_len; + } else if (frec_magic(&p->f) == TDB_FREE_MAGIC) { + len = frec_len(&p->f); + tally_add(fr, len); + len += sizeof(p->u); + unc++; + } else if (rec_magic(&p->u) == TDB_USED_MAGIC) { + len = sizeof(p->u) + + rec_key_length(&p->u) + + rec_data_length(&p->u) + + rec_extra_padding(&p->u); + + tally_add(keys, rec_key_length(&p->u)); + tally_add(data, rec_data_length(&p->u)); + tally_add(extra, rec_extra_padding(&p->u)); + } else if (rec_magic(&p->u) == TDB_HTABLE_MAGIC) { + tdb_off_t count = count_hash(tdb, + off + sizeof(p->u), + TDB_SUBLEVEL_HASH_BITS); + if (TDB_OFF_IS_ERR(count)) { + return count; + } + tally_add(hashes, count); + tally_add(extra, rec_extra_padding(&p->u)); + len = sizeof(p->u) + + rec_data_length(&p->u) + + rec_extra_padding(&p->u); + } else if (rec_magic(&p->u) == TDB_FTABLE_MAGIC) { + len = sizeof(p->u) + + rec_data_length(&p->u) + + rec_extra_padding(&p->u); + tally_add(ftables, rec_data_length(&p->u)); + tally_add(extra, rec_extra_padding(&p->u)); + } else if (rec_magic(&p->u) == TDB_CHAIN_MAGIC) { + len = sizeof(p->u) + + rec_data_length(&p->u) + + rec_extra_padding(&p->u); + tally_add(chains, 1); + tally_add(extra, rec_extra_padding(&p->u)); + } else { + len = dead_space(tdb, off); + if (TDB_OFF_IS_ERR(len)) { + return len; + } + } + tdb_access_release(tdb, p); + } + if (unc) + tally_add(uncoal, unc); + return TDB_SUCCESS; +} + +#define SUMMARY_FORMAT \ + "Size of file/data: %zu/%zu\n" \ + "Number of records: %zu\n" \ + "Smallest/average/largest keys: %zu/%zu/%zu\n%s" \ + "Smallest/average/largest data: %zu/%zu/%zu\n%s" \ + "Smallest/average/largest padding: %zu/%zu/%zu\n%s" \ + "Number of free records: %zu\n" \ + "Smallest/average/largest free records: %zu/%zu/%zu\n%s" \ + "Number of uncoalesced records: %zu\n" \ + "Smallest/average/largest uncoalesced runs: %zu/%zu/%zu\n%s" \ + "Toplevel hash used: %u of %u\n" \ + "Number of chains: %zu\n" \ + "Number of subhashes: %zu\n" \ + "Smallest/average/largest subhash entries: %zu/%zu/%zu\n%s" \ + "Percentage keys/data/padding/free/rechdrs/freehdrs/hashes: %.0f/%.0f/%.0f/%.0f/%.0f/%.0f/%.0f\n" + +#define BUCKET_SUMMARY_FORMAT_A \ + "Free bucket %zu: total entries %zu.\n" \ + "Smallest/average/largest length: %zu/%zu/%zu\n%s" +#define BUCKET_SUMMARY_FORMAT_B \ + "Free bucket %zu-%zu: total entries %zu.\n" \ + "Smallest/average/largest length: %zu/%zu/%zu\n%s" + +#define HISTO_WIDTH 70 +#define HISTO_HEIGHT 20 + +enum TDB_ERROR tdb_summary(struct tdb_context *tdb, + enum tdb_summary_flags flags, + char **summary) +{ + tdb_len_t len; + struct tally *ftables, *hashes, *freet, *keys, *data, *extra, *uncoal, + *chains; + char *hashesg, *freeg, *keysg, *datag, *extrag, *uncoalg; + enum TDB_ERROR ecode; + + hashesg = freeg = keysg = datag = extrag = uncoalg = NULL; + + ecode = tdb_allrecord_lock(tdb, F_RDLCK, TDB_LOCK_WAIT, false); + if (ecode != TDB_SUCCESS) { + return tdb->last_error = ecode; + } + + ecode = tdb_lock_expand(tdb, F_RDLCK); + if (ecode != TDB_SUCCESS) { + tdb_allrecord_unlock(tdb, F_RDLCK); + return tdb->last_error = ecode; + } + + /* Start stats off empty. */ + ftables = tally_new(HISTO_HEIGHT); + hashes = tally_new(HISTO_HEIGHT); + freet = tally_new(HISTO_HEIGHT); + keys = tally_new(HISTO_HEIGHT); + data = tally_new(HISTO_HEIGHT); + extra = tally_new(HISTO_HEIGHT); + uncoal = tally_new(HISTO_HEIGHT); + chains = tally_new(HISTO_HEIGHT); + if (!ftables || !hashes || !freet || !keys || !data || !extra + || !uncoal || !chains) { + ecode = tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR, + "tdb_summary: failed to allocate" + " tally structures"); + goto unlock; + } + + ecode = summarize(tdb, hashes, ftables, freet, keys, data, extra, + uncoal, chains); + if (ecode != TDB_SUCCESS) { + goto unlock; + } + + if (flags & TDB_SUMMARY_HISTOGRAMS) { + hashesg = tally_histogram(hashes, HISTO_WIDTH, HISTO_HEIGHT); + freeg = tally_histogram(freet, HISTO_WIDTH, HISTO_HEIGHT); + keysg = tally_histogram(keys, HISTO_WIDTH, HISTO_HEIGHT); + datag = tally_histogram(data, HISTO_WIDTH, HISTO_HEIGHT); + extrag = tally_histogram(extra, HISTO_WIDTH, HISTO_HEIGHT); + uncoalg = tally_histogram(uncoal, HISTO_WIDTH, HISTO_HEIGHT); + } + + /* 20 is max length of a %llu. */ + len = strlen(SUMMARY_FORMAT) + 33*20 + 1 + + (hashesg ? strlen(hashesg) : 0) + + (freeg ? strlen(freeg) : 0) + + (keysg ? strlen(keysg) : 0) + + (datag ? strlen(datag) : 0) + + (extrag ? strlen(extrag) : 0) + + (uncoalg ? strlen(uncoalg) : 0); + + *summary = malloc(len); + if (!*summary) { + ecode = tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR, + "tdb_summary: failed to allocate string"); + goto unlock; + } + + sprintf(*summary, SUMMARY_FORMAT, + (size_t)tdb->file->map_size, + tally_total(keys, NULL) + tally_total(data, NULL), + tally_num(keys), + tally_min(keys), tally_mean(keys), tally_max(keys), + keysg ? keysg : "", + tally_min(data), tally_mean(data), tally_max(data), + datag ? datag : "", + tally_min(extra), tally_mean(extra), tally_max(extra), + extrag ? extrag : "", + tally_num(freet), + tally_min(freet), tally_mean(freet), tally_max(freet), + freeg ? freeg : "", + tally_total(uncoal, NULL), + tally_min(uncoal), tally_mean(uncoal), tally_max(uncoal), + uncoalg ? uncoalg : "", + (unsigned)count_hash(tdb, offsetof(struct tdb_header, + hashtable), + TDB_TOPLEVEL_HASH_BITS), + 1 << TDB_TOPLEVEL_HASH_BITS, + tally_num(chains), + tally_num(hashes), + tally_min(hashes), tally_mean(hashes), tally_max(hashes), + hashesg ? hashesg : "", + tally_total(keys, NULL) * 100.0 / tdb->file->map_size, + tally_total(data, NULL) * 100.0 / tdb->file->map_size, + tally_total(extra, NULL) * 100.0 / tdb->file->map_size, + tally_total(freet, NULL) * 100.0 / tdb->file->map_size, + (tally_num(keys) + tally_num(freet) + tally_num(hashes)) + * sizeof(struct tdb_used_record) * 100.0 / tdb->file->map_size, + tally_num(ftables) * sizeof(struct tdb_freetable) + * 100.0 / tdb->file->map_size, + (tally_num(hashes) + * (sizeof(tdb_off_t) << TDB_SUBLEVEL_HASH_BITS) + + (sizeof(tdb_off_t) << TDB_TOPLEVEL_HASH_BITS) + + sizeof(struct tdb_chain) * tally_num(chains)) + * 100.0 / tdb->file->map_size); + +unlock: + free(hashesg); + free(freeg); + free(keysg); + free(datag); + free(extrag); + free(uncoalg); + free(hashes); + free(freet); + free(keys); + free(data); + free(extra); + free(uncoal); + free(ftables); + free(chains); + + tdb_allrecord_unlock(tdb, F_RDLCK); + tdb_unlock_expand(tdb, F_RDLCK); + return tdb->last_error = ecode; +} diff --git a/lib/tdb2/tdb.c b/lib/tdb2/tdb.c new file mode 100644 index 0000000000..753ccb0c8b --- /dev/null +++ b/lib/tdb2/tdb.c @@ -0,0 +1,486 @@ +#include "private.h" +#ifndef _SAMBA_BUILD_ +#include <ccan/asprintf/asprintf.h> +#include <stdarg.h> +#endif + +static enum TDB_ERROR update_rec_hdr(struct tdb_context *tdb, + tdb_off_t off, + tdb_len_t keylen, + tdb_len_t datalen, + struct tdb_used_record *rec, + uint64_t h) +{ + uint64_t dataroom = rec_data_length(rec) + rec_extra_padding(rec); + enum TDB_ERROR ecode; + + ecode = set_header(tdb, rec, TDB_USED_MAGIC, keylen, datalen, + keylen + dataroom, h); + if (ecode == TDB_SUCCESS) { + ecode = tdb_write_convert(tdb, off, rec, sizeof(*rec)); + } + return ecode; +} + +static enum TDB_ERROR replace_data(struct tdb_context *tdb, + struct hash_info *h, + struct tdb_data key, struct tdb_data dbuf, + tdb_off_t old_off, tdb_len_t old_room, + bool growing) +{ + tdb_off_t new_off; + enum TDB_ERROR ecode; + + /* Allocate a new record. */ + new_off = alloc(tdb, key.dsize, dbuf.dsize, h->h, TDB_USED_MAGIC, + growing); + if (TDB_OFF_IS_ERR(new_off)) { + return new_off; + } + + /* We didn't like the existing one: remove it. */ + if (old_off) { + tdb->stats.frees++; + ecode = add_free_record(tdb, old_off, + sizeof(struct tdb_used_record) + + key.dsize + old_room, + TDB_LOCK_WAIT, true); + if (ecode == TDB_SUCCESS) + ecode = replace_in_hash(tdb, h, new_off); + } else { + ecode = add_to_hash(tdb, h, new_off); + } + if (ecode != TDB_SUCCESS) { + return ecode; + } + + new_off += sizeof(struct tdb_used_record); + ecode = tdb->methods->twrite(tdb, new_off, key.dptr, key.dsize); + if (ecode != TDB_SUCCESS) { + return ecode; + } + + new_off += key.dsize; + ecode = tdb->methods->twrite(tdb, new_off, dbuf.dptr, dbuf.dsize); + if (ecode != TDB_SUCCESS) { + return ecode; + } + + if (tdb->flags & TDB_SEQNUM) + tdb_inc_seqnum(tdb); + + return TDB_SUCCESS; +} + +static enum TDB_ERROR update_data(struct tdb_context *tdb, + tdb_off_t off, + struct tdb_data dbuf, + tdb_len_t extra) +{ + enum TDB_ERROR ecode; + + ecode = tdb->methods->twrite(tdb, off, dbuf.dptr, dbuf.dsize); + if (ecode == TDB_SUCCESS && extra) { + /* Put a zero in; future versions may append other data. */ + ecode = tdb->methods->twrite(tdb, off + dbuf.dsize, "", 1); + } + if (tdb->flags & TDB_SEQNUM) + tdb_inc_seqnum(tdb); + + return ecode; +} + +enum TDB_ERROR tdb_store(struct tdb_context *tdb, + struct tdb_data key, struct tdb_data dbuf, int flag) +{ + struct hash_info h; + tdb_off_t off; + tdb_len_t old_room = 0; + struct tdb_used_record rec; + enum TDB_ERROR ecode; + + off = find_and_lock(tdb, key, F_WRLCK, &h, &rec, NULL); + if (TDB_OFF_IS_ERR(off)) { + return tdb->last_error = off; + } + + /* Now we have lock on this hash bucket. */ + if (flag == TDB_INSERT) { + if (off) { + ecode = TDB_ERR_EXISTS; + goto out; + } + } else { + if (off) { + old_room = rec_data_length(&rec) + + rec_extra_padding(&rec); + if (old_room >= dbuf.dsize) { + /* Can modify in-place. Easy! */ + ecode = update_rec_hdr(tdb, off, + key.dsize, dbuf.dsize, + &rec, h.h); + if (ecode != TDB_SUCCESS) { + goto out; + } + ecode = update_data(tdb, + off + sizeof(rec) + + key.dsize, dbuf, + old_room - dbuf.dsize); + if (ecode != TDB_SUCCESS) { + goto out; + } + tdb_unlock_hashes(tdb, h.hlock_start, + h.hlock_range, F_WRLCK); + return tdb->last_error = TDB_SUCCESS; + } + } else { + if (flag == TDB_MODIFY) { + /* if the record doesn't exist and we + are in TDB_MODIFY mode then we should fail + the store */ + ecode = TDB_ERR_NOEXIST; + goto out; + } + } + } + + /* If we didn't use the old record, this implies we're growing. */ + ecode = replace_data(tdb, &h, key, dbuf, off, old_room, off); +out: + tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range, F_WRLCK); + return tdb->last_error = ecode; +} + +enum TDB_ERROR tdb_append(struct tdb_context *tdb, + struct tdb_data key, struct tdb_data dbuf) +{ + struct hash_info h; + tdb_off_t off; + struct tdb_used_record rec; + tdb_len_t old_room = 0, old_dlen; + unsigned char *newdata; + struct tdb_data new_dbuf; + enum TDB_ERROR ecode; + + off = find_and_lock(tdb, key, F_WRLCK, &h, &rec, NULL); + if (TDB_OFF_IS_ERR(off)) { + return tdb->last_error = off; + } + + if (off) { + old_dlen = rec_data_length(&rec); + old_room = old_dlen + rec_extra_padding(&rec); + + /* Fast path: can append in place. */ + if (rec_extra_padding(&rec) >= dbuf.dsize) { + ecode = update_rec_hdr(tdb, off, key.dsize, + old_dlen + dbuf.dsize, &rec, + h.h); + if (ecode != TDB_SUCCESS) { + goto out; + } + + off += sizeof(rec) + key.dsize + old_dlen; + ecode = update_data(tdb, off, dbuf, + rec_extra_padding(&rec)); + goto out; + } + + /* Slow path. */ + newdata = malloc(key.dsize + old_dlen + dbuf.dsize); + if (!newdata) { + ecode = tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR, + "tdb_append:" + " failed to allocate %zu bytes", + (size_t)(key.dsize + old_dlen + + dbuf.dsize)); + goto out; + } + ecode = tdb->methods->tread(tdb, off + sizeof(rec) + key.dsize, + newdata, old_dlen); + if (ecode != TDB_SUCCESS) { + goto out_free_newdata; + } + memcpy(newdata + old_dlen, dbuf.dptr, dbuf.dsize); + new_dbuf.dptr = newdata; + new_dbuf.dsize = old_dlen + dbuf.dsize; + } else { + newdata = NULL; + new_dbuf = dbuf; + } + + /* If they're using tdb_append(), it implies they're growing record. */ + ecode = replace_data(tdb, &h, key, new_dbuf, off, old_room, true); + +out_free_newdata: + free(newdata); +out: + tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range, F_WRLCK); + return tdb->last_error = ecode; +} + +enum TDB_ERROR tdb_fetch(struct tdb_context *tdb, struct tdb_data key, + struct tdb_data *data) +{ + tdb_off_t off; + struct tdb_used_record rec; + struct hash_info h; + enum TDB_ERROR ecode; + + off = find_and_lock(tdb, key, F_RDLCK, &h, &rec, NULL); + if (TDB_OFF_IS_ERR(off)) { + return tdb->last_error = off; + } + + if (!off) { + ecode = TDB_ERR_NOEXIST; + } else { + data->dsize = rec_data_length(&rec); + data->dptr = tdb_alloc_read(tdb, off + sizeof(rec) + key.dsize, + data->dsize); + if (TDB_PTR_IS_ERR(data->dptr)) { + ecode = TDB_PTR_ERR(data->dptr); + } else + ecode = TDB_SUCCESS; + } + + tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range, F_RDLCK); + return tdb->last_error = ecode; +} + +bool tdb_exists(struct tdb_context *tdb, TDB_DATA key) +{ + tdb_off_t off; + struct tdb_used_record rec; + struct hash_info h; + + off = find_and_lock(tdb, key, F_RDLCK, &h, &rec, NULL); + if (TDB_OFF_IS_ERR(off)) { + tdb->last_error = off; + return false; + } + tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range, F_RDLCK); + + tdb->last_error = TDB_SUCCESS; + return off ? true : false; +} + +enum TDB_ERROR tdb_delete(struct tdb_context *tdb, struct tdb_data key) +{ + tdb_off_t off; + struct tdb_used_record rec; + struct hash_info h; + enum TDB_ERROR ecode; + + off = find_and_lock(tdb, key, F_WRLCK, &h, &rec, NULL); + if (TDB_OFF_IS_ERR(off)) { + return tdb->last_error = off; + } + + if (!off) { + ecode = TDB_ERR_NOEXIST; + goto unlock; + } + + ecode = delete_from_hash(tdb, &h); + if (ecode != TDB_SUCCESS) { + goto unlock; + } + + /* Free the deleted entry. */ + tdb->stats.frees++; + ecode = add_free_record(tdb, off, + sizeof(struct tdb_used_record) + + rec_key_length(&rec) + + rec_data_length(&rec) + + rec_extra_padding(&rec), + TDB_LOCK_WAIT, true); + + if (tdb->flags & TDB_SEQNUM) + tdb_inc_seqnum(tdb); + +unlock: + tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range, F_WRLCK); + return tdb->last_error = ecode; +} + +unsigned int tdb_get_flags(struct tdb_context *tdb) +{ + return tdb->flags; +} + +void tdb_add_flag(struct tdb_context *tdb, unsigned flag) +{ + if (tdb->flags & TDB_INTERNAL) { + tdb->last_error = tdb_logerr(tdb, TDB_ERR_EINVAL, + TDB_LOG_USE_ERROR, + "tdb_add_flag: internal db"); + return; + } + switch (flag) { + case TDB_NOLOCK: + tdb->flags |= TDB_NOLOCK; + break; + case TDB_NOMMAP: + tdb->flags |= TDB_NOMMAP; + tdb_munmap(tdb->file); + break; + case TDB_NOSYNC: + tdb->flags |= TDB_NOSYNC; + break; + case TDB_SEQNUM: + tdb->flags |= TDB_SEQNUM; + break; + case TDB_ALLOW_NESTING: + tdb->flags |= TDB_ALLOW_NESTING; + break; + default: + tdb->last_error = tdb_logerr(tdb, TDB_ERR_EINVAL, + TDB_LOG_USE_ERROR, + "tdb_add_flag: Unknown flag %u", + flag); + } +} + +void tdb_remove_flag(struct tdb_context *tdb, unsigned flag) +{ + if (tdb->flags & TDB_INTERNAL) { + tdb->last_error = tdb_logerr(tdb, TDB_ERR_EINVAL, + TDB_LOG_USE_ERROR, + "tdb_remove_flag: internal db"); + return; + } + switch (flag) { + case TDB_NOLOCK: + tdb->flags &= ~TDB_NOLOCK; + break; + case TDB_NOMMAP: + tdb->flags &= ~TDB_NOMMAP; + tdb_mmap(tdb); + break; + case TDB_NOSYNC: + tdb->flags &= ~TDB_NOSYNC; + break; + case TDB_SEQNUM: + tdb->flags &= ~TDB_SEQNUM; + break; + case TDB_ALLOW_NESTING: + tdb->flags &= ~TDB_ALLOW_NESTING; + break; + default: + tdb->last_error = tdb_logerr(tdb, TDB_ERR_EINVAL, + TDB_LOG_USE_ERROR, + "tdb_remove_flag: Unknown flag %u", + flag); + } +} + +const char *tdb_errorstr(enum TDB_ERROR ecode) +{ + /* Gcc warns if you miss a case in the switch, so use that. */ + switch (ecode) { + case TDB_SUCCESS: return "Success"; + case TDB_ERR_CORRUPT: return "Corrupt database"; + case TDB_ERR_IO: return "IO Error"; + case TDB_ERR_LOCK: return "Locking error"; + case TDB_ERR_OOM: return "Out of memory"; + case TDB_ERR_EXISTS: return "Record exists"; + case TDB_ERR_EINVAL: return "Invalid parameter"; + case TDB_ERR_NOEXIST: return "Record does not exist"; + case TDB_ERR_RDONLY: return "write not permitted"; + } + return "Invalid error code"; +} + +enum TDB_ERROR tdb_error(struct tdb_context *tdb) +{ + return tdb->last_error; +} + +enum TDB_ERROR COLD tdb_logerr(struct tdb_context *tdb, + enum TDB_ERROR ecode, + enum tdb_log_level level, + const char *fmt, ...) +{ + char *message; + va_list ap; + size_t len; + /* tdb_open paths care about errno, so save it. */ + int saved_errno = errno; + + if (!tdb->log_fn) + return ecode; + + va_start(ap, fmt); + len = vasprintf(&message, fmt, ap); + va_end(ap); + + if (len < 0) { + tdb->log_fn(tdb, TDB_LOG_ERROR, + "out of memory formatting message:", tdb->log_data); + tdb->log_fn(tdb, level, fmt, tdb->log_data); + } else { + tdb->log_fn(tdb, level, message, tdb->log_data); + free(message); + } + errno = saved_errno; + return ecode; +} + +enum TDB_ERROR tdb_parse_record_(struct tdb_context *tdb, + TDB_DATA key, + enum TDB_ERROR (*parse)(TDB_DATA k, + TDB_DATA d, + void *data), + void *data) +{ + tdb_off_t off; + struct tdb_used_record rec; + struct hash_info h; + enum TDB_ERROR ecode; + + off = find_and_lock(tdb, key, F_RDLCK, &h, &rec, NULL); + if (TDB_OFF_IS_ERR(off)) { + return tdb->last_error = off; + } + + if (!off) { + ecode = TDB_ERR_NOEXIST; + } else { + const void *dptr; + dptr = tdb_access_read(tdb, off + sizeof(rec) + key.dsize, + rec_data_length(&rec), false); + if (TDB_PTR_IS_ERR(dptr)) { + ecode = TDB_PTR_ERR(dptr); + } else { + TDB_DATA d = tdb_mkdata(dptr, rec_data_length(&rec)); + + ecode = parse(key, d, data); + tdb_access_release(tdb, dptr); + } + } + + tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range, F_RDLCK); + return tdb->last_error = ecode; +} + +const char *tdb_name(const struct tdb_context *tdb) +{ + return tdb->name; +} + +int64_t tdb_get_seqnum(struct tdb_context *tdb) +{ + tdb_off_t off = tdb_read_off(tdb, offsetof(struct tdb_header, seqnum)); + if (TDB_OFF_IS_ERR(off)) + tdb->last_error = off; + else + tdb->last_error = TDB_SUCCESS; + return off; +} + + +int tdb_fd(const struct tdb_context *tdb) +{ + return tdb->file->fd; +} diff --git a/lib/tdb2/tdb2.h b/lib/tdb2/tdb2.h new file mode 100644 index 0000000000..3f80793d76 --- /dev/null +++ b/lib/tdb2/tdb2.h @@ -0,0 +1,848 @@ +#ifndef CCAN_TDB2_H +#define CCAN_TDB2_H + +/* + TDB version 2: trivial database library + + Copyright (C) Andrew Tridgell 1999-2004 + Copyright (C) Rusty Russell 2010-2011 + + ** NOTE! The following LGPL license applies to the tdb + ** library. This does NOT imply that all of Samba is released + ** under the LGPL + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 3 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, see <http://www.gnu.org/licenses/>. +*/ + +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef _SAMBA_BUILD_ +/* For mode_t */ +#include <sys/types.h> +/* For O_* flags. */ +#include <sys/stat.h> +/* For sig_atomic_t. */ +#include <signal.h> +/* For uint64_t */ +#include <stdint.h> +/* For bool */ +#include <stdbool.h> +/* For memcmp */ +#include <string.h> +#else +#include "replace.h" +#endif +#include <ccan/compiler/compiler.h> +#include <ccan/typesafe_cb/typesafe_cb.h> +#include <ccan/cast/cast.h> + +union tdb_attribute; +struct tdb_context; + +/** + * tdb_open - open a database file + * @name: the file name (can be NULL if flags contains TDB_INTERNAL) + * @tdb_flags: options for this database + * @open_flags: flags argument for tdb's open() call. + * @mode: mode argument for tdb's open() call. + * @attributes: linked list of extra attributes for this tdb. + * + * This call opens (and potentially creates) a database file. + * Multiple processes can have the TDB file open at once. + * + * On failure it will return NULL, and set errno: it may also call + * any log attribute found in @attributes. + * + * See also: + * union tdb_attribute + */ +struct tdb_context *tdb_open(const char *name, int tdb_flags, + int open_flags, mode_t mode, + union tdb_attribute *attributes); + + +/* flags for tdb_open() */ +#define TDB_DEFAULT 0 /* just a readability place holder */ +#define TDB_INTERNAL 2 /* don't store on disk */ +#define TDB_NOLOCK 4 /* don't do any locking */ +#define TDB_NOMMAP 8 /* don't use mmap */ +#define TDB_CONVERT 16 /* convert endian */ +#define TDB_NOSYNC 64 /* don't use synchronous transactions */ +#define TDB_SEQNUM 128 /* maintain a sequence number */ +#define TDB_ALLOW_NESTING 256 /* fake nested transactions */ + +/** + * tdb_close - close and free a tdb. + * @tdb: the tdb context returned from tdb_open() + * + * This always succeeds, in that @tdb is unusable after this call. But if + * some unexpected error occurred while closing, it will return non-zero + * (the only clue as to cause will be via the log attribute). + */ +int tdb_close(struct tdb_context *tdb); + +/** + * struct tdb_data - representation of keys or values. + * @dptr: the data pointer + * @dsize: the size of the data pointed to by dptr. + * + * This is the "blob" representation of keys and data used by TDB. + */ +typedef struct tdb_data { + unsigned char *dptr; + size_t dsize; +} TDB_DATA; + +/** + * enum TDB_ERROR - error returns for TDB + * + * See Also: + * tdb_errorstr() + */ +enum TDB_ERROR { + TDB_SUCCESS = 0, /* No error. */ + TDB_ERR_CORRUPT = -1, /* We read the db, and it was bogus. */ + TDB_ERR_IO = -2, /* We couldn't read/write the db. */ + TDB_ERR_LOCK = -3, /* Locking failed. */ + TDB_ERR_OOM = -4, /* Out of Memory. */ + TDB_ERR_EXISTS = -5, /* The key already exists. */ + TDB_ERR_NOEXIST = -6, /* The key does not exist. */ + TDB_ERR_EINVAL = -7, /* You're using it wrong. */ + TDB_ERR_RDONLY = -8, /* The database is read-only. */ + TDB_ERR_LAST = TDB_ERR_RDONLY +}; + +/** + * tdb_store - store a key/value pair in a tdb. + * @tdb: the tdb context returned from tdb_open() + * @key: the key + * @dbuf: the data to associate with the key. + * @flag: TDB_REPLACE, TDB_INSERT or TDB_MODIFY. + * + * This inserts (or overwrites) a key/value pair in the TDB. If flag + * is TDB_REPLACE, it doesn't matter whether the key exists or not; + * TDB_INSERT means it must not exist (returns TDB_ERR_EXISTS otherwise), + * and TDB_MODIFY means it must exist (returns TDB_ERR_NOEXIST otherwise). + * + * On success, this returns TDB_SUCCESS. + * + * See also: + * tdb_fetch, tdb_transaction_start, tdb_append, tdb_delete. + */ +enum TDB_ERROR tdb_store(struct tdb_context *tdb, + struct tdb_data key, + struct tdb_data dbuf, + int flag); + +/* flags to tdb_store() */ +#define TDB_REPLACE 1 /* A readability place holder */ +#define TDB_INSERT 2 /* Don't overwrite an existing entry */ +#define TDB_MODIFY 3 /* Don't create an existing entry */ + +/** + * tdb_fetch - fetch a value from a tdb. + * @tdb: the tdb context returned from tdb_open() + * @key: the key + * @data: pointer to data. + * + * This looks up a key in the database and sets it in @data. + * + * If it returns TDB_SUCCESS, the key was found: it is your + * responsibility to call free() on @data->dptr. + * + * Otherwise, it returns an error (usually, TDB_ERR_NOEXIST) and @data is + * undefined. + */ +enum TDB_ERROR tdb_fetch(struct tdb_context *tdb, struct tdb_data key, + struct tdb_data *data); + +/** + * tdb_errorstr - map the tdb error onto a constant readable string + * @ecode: the enum TDB_ERROR to map. + * + * This is useful for displaying errors to users. + */ +const char *tdb_errorstr(enum TDB_ERROR ecode); + +/** + * tdb_append - append a value to a key/value pair in a tdb. + * @tdb: the tdb context returned from tdb_open() + * @key: the key + * @dbuf: the data to append. + * + * This is equivalent to fetching a record, reallocating .dptr to add the + * data, and writing it back, only it's much more efficient. If the key + * doesn't exist, it's equivalent to tdb_store (with an additional hint that + * you expect to expand the record in future). + * + * See Also: + * tdb_fetch(), tdb_store() + */ +enum TDB_ERROR tdb_append(struct tdb_context *tdb, + struct tdb_data key, struct tdb_data dbuf); + +/** + * tdb_delete - delete a key from a tdb. + * @tdb: the tdb context returned from tdb_open() + * @key: the key to delete. + * + * Returns TDB_SUCCESS on success, or an error (usually TDB_ERR_NOEXIST). + * + * See Also: + * tdb_fetch(), tdb_store() + */ +enum TDB_ERROR tdb_delete(struct tdb_context *tdb, struct tdb_data key); + +/** + * tdb_exists - does a key exist in the database? + * @tdb: the tdb context returned from tdb_open() + * @key: the key to search for. + * + * Returns true if it exists, or false if it doesn't or any other error. + */ +bool tdb_exists(struct tdb_context *tdb, TDB_DATA key); + +/** + * tdb_deq - are struct tdb_data equal? + * @a: one struct tdb_data + * @b: another struct tdb_data + */ +static inline bool tdb_deq(struct tdb_data a, struct tdb_data b) +{ + return a.dsize == b.dsize && memcmp(a.dptr, b.dptr, a.dsize) == 0; +} + +/** + * tdb_mkdata - make a struct tdb_data from const data + * @p: the constant pointer + * @len: the length + * + * As the dptr member of struct tdb_data is not constant, you need to + * cast it. This function keeps thost casts in one place, as well as + * suppressing the warning some compilers give when casting away a + * qualifier (eg. gcc with -Wcast-qual) + */ +static inline struct tdb_data tdb_mkdata(const void *p, size_t len) +{ + struct tdb_data d; + d.dptr = cast_const(void *, p); + d.dsize = len; + return d; +} + +/** + * tdb_transaction_start - start a transaction + * @tdb: the tdb context returned from tdb_open() + * + * This begins a series of atomic operations. Other processes will be able + * to read the tdb, but not alter it (they will block), nor will they see + * any changes until tdb_transaction_commit() is called. + * + * Note that if the TDB_ALLOW_NESTING flag is set, a tdb_transaction_start() + * within a transaction will succeed, but it's not a real transaction: + * (1) An inner transaction which is committed is not actually committed until + * the outer transaction is; if the outer transaction is cancelled, the + * inner ones are discarded. + * (2) tdb_transaction_cancel() marks the outer transaction as having an error, + * so the final tdb_transaction_commit() will fail. + * (3) the outer transaction will see the results of the inner transaction. + * + * See Also: + * tdb_transaction_cancel, tdb_transaction_commit. + */ +enum TDB_ERROR tdb_transaction_start(struct tdb_context *tdb); + +/** + * tdb_transaction_cancel - abandon a transaction + * @tdb: the tdb context returned from tdb_open() + * + * This aborts a transaction, discarding any changes which were made. + * tdb_close() does this implicitly. + */ +void tdb_transaction_cancel(struct tdb_context *tdb); + +/** + * tdb_transaction_commit - commit a transaction + * @tdb: the tdb context returned from tdb_open() + * + * This completes a transaction, writing any changes which were made. + * + * fsync() is used to commit the transaction (unless TDB_NOSYNC is set), + * making it robust against machine crashes, but very slow compared to + * other TDB operations. + * + * A failure can only be caused by unexpected errors (eg. I/O or + * memory); this is no point looping on transaction failure. + * + * See Also: + * tdb_transaction_prepare_commit() + */ +enum TDB_ERROR tdb_transaction_commit(struct tdb_context *tdb); + +/** + * tdb_transaction_prepare_commit - prepare to commit a transaction + * @tdb: the tdb context returned from tdb_open() + * + * This ensures we have the resources to commit a transaction (using + * tdb_transaction_commit): if this succeeds then a transaction will only + * fail if the write() or fsync() calls fail. + * + * If this fails you must still call tdb_transaction_cancel() to cancel + * the transaction. + * + * See Also: + * tdb_transaction_commit() + */ +enum TDB_ERROR tdb_transaction_prepare_commit(struct tdb_context *tdb); + +/** + * tdb_traverse - traverse a TDB + * @tdb: the tdb context returned from tdb_open() + * @fn: the function to call for every key/value pair (or NULL) + * @p: the pointer to hand to @f + * + * This walks the TDB until all they keys have been traversed, or @fn + * returns non-zero. If the traverse function or other processes are + * changing data or adding or deleting keys, the traverse may be + * unreliable: keys may be skipped or (rarely) visited twice. + * + * There is one specific exception: the special case of deleting the + * current key does not undermine the reliability of the traversal. + * + * On success, returns the number of keys iterated. On error returns + * a negative enum TDB_ERROR value. + */ +#define tdb_traverse(tdb, fn, p) \ + tdb_traverse_(tdb, typesafe_cb_preargs(int, void *, (fn), (p), \ + struct tdb_context *, \ + TDB_DATA, TDB_DATA), (p)) + +int64_t tdb_traverse_(struct tdb_context *tdb, + int (*fn)(struct tdb_context *, + TDB_DATA, TDB_DATA, void *), void *p); + +/** + * tdb_parse_record - operate directly on data in the database. + * @tdb: the tdb context returned from tdb_open() + * @key: the key whose record we should hand to @parse + * @parse: the function to call for the data + * @data: the private pointer to hand to @parse (types must match). + * + * This avoids a copy for many cases, by handing you a pointer into + * the memory-mapped database. It also locks the record to prevent + * other accesses at the same time. + * + * Do not alter the data handed to parse()! + */ +#define tdb_parse_record(tdb, key, parse, data) \ + tdb_parse_record_((tdb), (key), \ + typesafe_cb_preargs(enum TDB_ERROR, void *, \ + (parse), (data), \ + TDB_DATA, TDB_DATA), (data)) + +enum TDB_ERROR tdb_parse_record_(struct tdb_context *tdb, + TDB_DATA key, + enum TDB_ERROR (*parse)(TDB_DATA k, + TDB_DATA d, + void *data), + void *data); + +/** + * tdb_get_seqnum - get a database sequence number + * @tdb: the tdb context returned from tdb_open() + * + * This returns a sequence number: any change to the database from a + * tdb context opened with the TDB_SEQNUM flag will cause that number + * to increment. Note that the incrementing is unreliable (it is done + * without locking), so this is only useful as an optimization. + * + * For example, you may have a regular database backup routine which + * does not operate if the sequence number is unchanged. In the + * unlikely event of a failed increment, it will be backed up next + * time any way. + * + * Returns an enum TDB_ERROR (ie. negative) on error. + */ +int64_t tdb_get_seqnum(struct tdb_context *tdb); + +/** + * tdb_firstkey - get the "first" key in a TDB + * @tdb: the tdb context returned from tdb_open() + * @key: pointer to key. + * + * This returns an arbitrary key in the database; with tdb_nextkey() it allows + * open-coded traversal of the database, though it is slightly less efficient + * than tdb_traverse. + * + * It is your responsibility to free @key->dptr on success. + * + * Returns TDB_ERR_NOEXIST if the database is empty. + */ +enum TDB_ERROR tdb_firstkey(struct tdb_context *tdb, struct tdb_data *key); + +/** + * tdb_nextkey - get the "next" key in a TDB + * @tdb: the tdb context returned from tdb_open() + * @key: a key returned by tdb_firstkey() or tdb_nextkey(). + * + * This returns another key in the database; it will free @key.dptr for + * your convenience. + * + * Returns TDB_ERR_NOEXIST if there are no more keys. + */ +enum TDB_ERROR tdb_nextkey(struct tdb_context *tdb, struct tdb_data *key); + +/** + * tdb_chainlock - lock a record in the TDB + * @tdb: the tdb context returned from tdb_open() + * @key: the key to lock. + * + * This prevents any access occurring to a group of keys including @key, + * even if @key does not exist. This allows primitive atomic updates of + * records without using transactions. + * + * You cannot begin a transaction while holding a tdb_chainlock(), nor can + * you do any operations on any other keys in the database. This also means + * that you cannot hold more than one tdb_chainlock() at a time. + * + * See Also: + * tdb_chainunlock() + */ +enum TDB_ERROR tdb_chainlock(struct tdb_context *tdb, TDB_DATA key); + +/** + * tdb_chainunlock - unlock a record in the TDB + * @tdb: the tdb context returned from tdb_open() + * @key: the key to unlock. + * + * The key must have previously been locked by tdb_chainlock(). + */ +void tdb_chainunlock(struct tdb_context *tdb, TDB_DATA key); + +/** + * tdb_chainlock_read - lock a record in the TDB, for reading + * @tdb: the tdb context returned from tdb_open() + * @key: the key to lock. + * + * This prevents any changes from occurring to a group of keys including @key, + * even if @key does not exist. This allows primitive atomic updates of + * records without using transactions. + * + * You cannot begin a transaction while holding a tdb_chainlock_read(), nor can + * you do any operations on any other keys in the database. This also means + * that you cannot hold more than one tdb_chainlock()/read() at a time. + * + * See Also: + * tdb_chainlock() + */ +enum TDB_ERROR tdb_chainlock_read(struct tdb_context *tdb, TDB_DATA key); + +/** + * tdb_chainunlock_read - unlock a record in the TDB for reading + * @tdb: the tdb context returned from tdb_open() + * @key: the key to unlock. + * + * The key must have previously been locked by tdb_chainlock_read(). + */ +void tdb_chainunlock_read(struct tdb_context *tdb, TDB_DATA key); + +/** + * tdb_lockall - lock the entire TDB + * @tdb: the tdb context returned from tdb_open() + * + * You cannot hold a tdb_chainlock while calling this. It nests, so you + * must call tdb_unlockall as many times as you call tdb_lockall. + */ +enum TDB_ERROR tdb_lockall(struct tdb_context *tdb); + +/** + * tdb_unlockall - unlock the entire TDB + * @tdb: the tdb context returned from tdb_open() + */ +void tdb_unlockall(struct tdb_context *tdb); + +/** + * tdb_lockall_read - lock the entire TDB for reading + * @tdb: the tdb context returned from tdb_open() + * + * This prevents others writing to the database, eg. tdb_delete, tdb_store, + * tdb_append, but not tdb_fetch. + * + * You cannot hold a tdb_chainlock while calling this. It nests, so you + * must call tdb_unlockall_read as many times as you call tdb_lockall_read. + */ +enum TDB_ERROR tdb_lockall_read(struct tdb_context *tdb); + +/** + * tdb_unlockall_read - unlock the entire TDB for reading + * @tdb: the tdb context returned from tdb_open() + */ +void tdb_unlockall_read(struct tdb_context *tdb); + +/** + * tdb_wipe_all - wipe the database clean + * @tdb: the tdb context returned from tdb_open() + * + * Completely erase the database. This is faster than iterating through + * each key and doing tdb_delete. + */ +enum TDB_ERROR tdb_wipe_all(struct tdb_context *tdb); + +/** + * tdb_check - check a TDB for consistency + * @tdb: the tdb context returned from tdb_open() + * @check: function to check each key/data pair (or NULL) + * @data: argument for @check, must match type. + * + * This performs a consistency check of the open database, optionally calling + * a check() function on each record so you can do your own data consistency + * checks as well. If check() returns an error, that is returned from + * tdb_check(). + * + * Returns TDB_SUCCESS or an error. + */ +#define tdb_check(tdb, check, data) \ + tdb_check_((tdb), typesafe_cb_preargs(enum TDB_ERROR, void *, \ + (check), (data), \ + struct tdb_data, \ + struct tdb_data), \ + (data)) + +enum TDB_ERROR tdb_check_(struct tdb_context *tdb, + enum TDB_ERROR (*check)(struct tdb_data k, + struct tdb_data d, + void *data), + void *data); + +/** + * tdb_error - get the last error (not threadsafe) + * @tdb: the tdb context returned from tdb_open() + * + * Returns the last error returned by a TDB function. + * + * This makes porting from TDB1 easier, but note that the last error is not + * reliable in threaded programs. + */ +enum TDB_ERROR tdb_error(struct tdb_context *tdb); + +/** + * enum tdb_summary_flags - flags for tdb_summary. + */ +enum tdb_summary_flags { + TDB_SUMMARY_HISTOGRAMS = 1 /* Draw graphs in the summary. */ +}; + +/** + * tdb_summary - return a string describing the TDB state + * @tdb: the tdb context returned from tdb_open() + * @flags: flags to control the summary output. + * @summary: pointer to string to allocate. + * + * This returns a developer-readable string describing the overall + * state of the tdb, such as the percentage used and sizes of records. + * It is designed to provide information about the tdb at a glance + * without displaying any keys or data in the database. + * + * On success, sets @summary to point to a malloc()'ed nul-terminated + * multi-line string. It is your responsibility to free() it. + */ +enum TDB_ERROR tdb_summary(struct tdb_context *tdb, + enum tdb_summary_flags flags, + char **summary); + + +/** + * tdb_get_flags - return the flags for a tdb + * @tdb: the tdb context returned from tdb_open() + * + * This returns the flags on the current tdb. Some of these are caused by + * the flags argument to tdb_open(), others (such as TDB_CONVERT) are + * intuited. + */ +unsigned int tdb_get_flags(struct tdb_context *tdb); + +/** + * tdb_add_flag - set a flag for a tdb + * @tdb: the tdb context returned from tdb_open() + * @flag: one of TDB_NOLOCK, TDB_NOMMAP, TDB_NOSYNC or TDB_ALLOW_NESTING. + * + * You can use this to set a flag on the TDB. You cannot set these flags + * on a TDB_INTERNAL tdb. + */ +void tdb_add_flag(struct tdb_context *tdb, unsigned flag); + +/** + * tdb_remove_flag - unset a flag for a tdb + * @tdb: the tdb context returned from tdb_open() + * @flag: one of TDB_NOLOCK, TDB_NOMMAP, TDB_NOSYNC or TDB_ALLOW_NESTING. + * + * You can use this to clear a flag on the TDB. You cannot clear flags + * on a TDB_INTERNAL tdb. + */ +void tdb_remove_flag(struct tdb_context *tdb, unsigned flag); + +/** + * enum tdb_attribute_type - descriminator for union tdb_attribute. + */ +enum tdb_attribute_type { + TDB_ATTRIBUTE_LOG = 0, + TDB_ATTRIBUTE_HASH = 1, + TDB_ATTRIBUTE_SEED = 2, + TDB_ATTRIBUTE_STATS = 3, + TDB_ATTRIBUTE_OPENHOOK = 4, + TDB_ATTRIBUTE_FLOCK = 5 +}; + +/** + * tdb_get_attribute - get an attribute for an existing tdb + * @tdb: the tdb context returned from tdb_open() + * @attr: the union tdb_attribute to set. + * + * This gets an attribute from a TDB which has previously been set (or + * may return the default values). Set @attr.base.attr to the + * attribute type you want get. + * + * Currently this does not work for TDB_ATTRIBUTE_OPENHOOK. + */ +enum TDB_ERROR tdb_get_attribute(struct tdb_context *tdb, + union tdb_attribute *attr); + +/** + * tdb_set_attribute - set an attribute for an existing tdb + * @tdb: the tdb context returned from tdb_open() + * @attr: the union tdb_attribute to set. + * + * This sets an attribute on a TDB, overriding any previous attribute + * of the same type. It returns TDB_ERR_EINVAL if the attribute is + * unknown or invalid. + * + * Note that TDB_ATTRIBUTE_HASH, TDB_ATTRIBUTE_SEED and + * TDB_ATTRIBUTE_OPENHOOK cannot currently be set after tdb_open. + */ +enum TDB_ERROR tdb_set_attribute(struct tdb_context *tdb, + const union tdb_attribute *attr); + +/** + * tdb_unset_attribute - reset an attribute for an existing tdb + * @tdb: the tdb context returned from tdb_open() + * @type: the attribute type to unset. + * + * This unsets an attribute on a TDB, returning it to the defaults + * (where applicable). + * + * Note that it only makes sense for TDB_ATTRIBUTE_LOG and TDB_ATTRIBUTE_FLOCK + * to be unset. + */ +void tdb_unset_attribute(struct tdb_context *tdb, + enum tdb_attribute_type type); + +/** + * tdb_name - get the name of a tdb + * @tdb: the tdb context returned from tdb_open() + * + * This returns a copy of the name string, made at tdb_open() time. If that + * argument was NULL (possible for a TDB_INTERNAL db) this will return NULL. + * + * This is mostly useful for logging. + */ +const char *tdb_name(const struct tdb_context *tdb); + +/** + * tdb_fd - get the file descriptor of a tdb + * @tdb: the tdb context returned from tdb_open() + * + * This returns the file descriptor for the underlying database file, or -1 + * for TDB_INTERNAL. + */ +int tdb_fd(const struct tdb_context *tdb); + +/** + * struct tdb_attribute_base - common fields for all tdb attributes. + */ +struct tdb_attribute_base { + enum tdb_attribute_type attr; + union tdb_attribute *next; +}; + +/** + * enum tdb_log_level - log levels for tdb_attribute_log + * @TDB_LOG_ERROR: used to log unrecoverable errors such as I/O errors + * or internal consistency failures. + * @TDB_LOG_USE_ERROR: used to log usage errors such as invalid parameters + * or writing to a read-only database. + * @TDB_LOG_WARNING: used for informational messages on issues which + * are unusual but handled by TDB internally, such + * as a failure to mmap or failure to open /dev/urandom. + */ +enum tdb_log_level { + TDB_LOG_ERROR, + TDB_LOG_USE_ERROR, + TDB_LOG_WARNING +}; + +/** + * struct tdb_attribute_log - log function attribute + * + * This attribute provides a hook for you to log errors. + */ +struct tdb_attribute_log { + struct tdb_attribute_base base; /* .attr = TDB_ATTRIBUTE_LOG */ + void (*fn)(struct tdb_context *tdb, + enum tdb_log_level level, + const char *message, + void *data); + void *data; +}; + +/** + * struct tdb_attribute_hash - hash function attribute + * + * This attribute allows you to provide an alternative hash function. + * This hash function will be handed keys from the database; it will also + * be handed the 8-byte TDB_HASH_MAGIC value for checking the header (the + * tdb_open() will fail if the hash value doesn't match the header). + * + * Note that if your hash function gives different results on + * different machine endians, your tdb will no longer work across + * different architectures! + */ +struct tdb_attribute_hash { + struct tdb_attribute_base base; /* .attr = TDB_ATTRIBUTE_HASH */ + uint64_t (*fn)(const void *key, size_t len, uint64_t seed, + void *data); + void *data; +}; + +/** + * struct tdb_attribute_seed - hash function seed attribute + * + * The hash function seed is normally taken from /dev/urandom (or equivalent) + * but can be set manually here. This is mainly for testing purposes. + */ +struct tdb_attribute_seed { + struct tdb_attribute_base base; /* .attr = TDB_ATTRIBUTE_SEED */ + uint64_t seed; +}; + +/** + * struct tdb_attribute_stats - tdb operational statistics + * + * This attribute records statistics of various low-level TDB operations. + * This can be used to assist performance evaluation. This is only + * useful for tdb_get_attribute(). + * + * New fields will be added at the end, hence the "size" argument which + * indicates how large your structure is: it must be filled in before + * calling tdb_get_attribute(), which will overwrite it with the size + * tdb knows about. + */ +struct tdb_attribute_stats { + struct tdb_attribute_base base; /* .attr = TDB_ATTRIBUTE_STATS */ + size_t size; /* = sizeof(struct tdb_attribute_stats) */ + uint64_t allocs; + uint64_t alloc_subhash; + uint64_t alloc_chain; + uint64_t alloc_bucket_exact; + uint64_t alloc_bucket_max; + uint64_t alloc_leftover; + uint64_t alloc_coalesce_tried; + uint64_t alloc_coalesce_iterate_clash; + uint64_t alloc_coalesce_lockfail; + uint64_t alloc_coalesce_race; + uint64_t alloc_coalesce_succeeded; + uint64_t alloc_coalesce_num_merged; + uint64_t compares; + uint64_t compare_wrong_bucket; + uint64_t compare_wrong_offsetbits; + uint64_t compare_wrong_keylen; + uint64_t compare_wrong_rechash; + uint64_t compare_wrong_keycmp; + uint64_t transactions; + uint64_t transaction_cancel; + uint64_t transaction_nest; + uint64_t transaction_expand_file; + uint64_t transaction_read_direct; + uint64_t transaction_read_direct_fail; + uint64_t transaction_write_direct; + uint64_t transaction_write_direct_fail; + uint64_t expands; + uint64_t frees; + uint64_t locks; + uint64_t lock_lowlevel; + uint64_t lock_nonblock; + uint64_t lock_nonblock_fail; +}; + +/** + * struct tdb_attribute_openhook - tdb special effects hook for open + * + * This attribute contains a function to call once we have the OPEN_LOCK + * for the tdb, but before we've examined its contents. If this succeeds, + * the tdb will be populated if it's then zero-length. + * + * This is a hack to allow support for TDB1-style TDB_CLEAR_IF_FIRST + * behaviour. + */ +struct tdb_attribute_openhook { + struct tdb_attribute_base base; /* .attr = TDB_ATTRIBUTE_OPENHOOK */ + enum TDB_ERROR (*fn)(int fd, void *data); + void *data; +}; + +/** + * struct tdb_attribute_flock - tdb special effects hook for file locking + * + * This attribute contains function to call to place locks on a file; it can + * be used to support non-blocking operations or lock proxying. + * + * They should return 0 on success, -1 on failure and set errno. + * + * An error will be logged on error if errno is neither EAGAIN nor EINTR + * (normally it would only return EAGAIN if waitflag is false, and + * loop internally on EINTR). + */ +struct tdb_attribute_flock { + struct tdb_attribute_base base; /* .attr = TDB_ATTRIBUTE_FLOCK */ + int (*lock)(int fd,int rw, off_t off, off_t len, bool waitflag, void *); + int (*unlock)(int fd, int rw, off_t off, off_t len, void *); + void *data; +}; + +/** + * union tdb_attribute - tdb attributes. + * + * This represents all the known attributes. + * + * See also: + * struct tdb_attribute_log, struct tdb_attribute_hash, + * struct tdb_attribute_seed, struct tdb_attribute_stats, + * struct tdb_attribute_openhook, struct tdb_attribute_flock. + */ +union tdb_attribute { + struct tdb_attribute_base base; + struct tdb_attribute_log log; + struct tdb_attribute_hash hash; + struct tdb_attribute_seed seed; + struct tdb_attribute_stats stats; + struct tdb_attribute_openhook openhook; + struct tdb_attribute_flock flock; +}; + +#ifdef __cplusplus +} +#endif + +#endif /* tdb2.h */ diff --git a/lib/tdb2/test/external-agent.c b/lib/tdb2/test/external-agent.c new file mode 100644 index 0000000000..055b5de736 --- /dev/null +++ b/lib/tdb2/test/external-agent.c @@ -0,0 +1,250 @@ +#include "external-agent.h" +#include "logging.h" +#include "lock-tracking.h" +#include <sys/types.h> +#include <sys/wait.h> +#include <unistd.h> +#include <err.h> +#include <fcntl.h> +#include <stdlib.h> +#include <limits.h> +#include <string.h> +#include <errno.h> +#include <ccan/tdb2/private.h> +#include <ccan/tap/tap.h> +#include <stdio.h> +#include <stdarg.h> + +static struct tdb_context *tdb; + +static enum TDB_ERROR clear_if_first(int fd, void *arg) +{ +/* We hold a lock offset 63 always, so we can tell if anyone is holding it. */ + struct flock fl; + + fl.l_type = F_WRLCK; + fl.l_whence = SEEK_SET; + fl.l_start = 63; + fl.l_len = 1; + + if (fcntl(fd, F_SETLK, &fl) == 0) { + /* We must be first ones to open it! */ + diag("agent truncating file!"); + if (ftruncate(fd, 0) != 0) { + return TDB_ERR_IO; + } + } + fl.l_type = F_RDLCK; + if (fcntl(fd, F_SETLKW, &fl) != 0) { + return TDB_ERR_IO; + } + return TDB_SUCCESS; +} + +static enum agent_return do_operation(enum operation op, const char *name) +{ + TDB_DATA k; + enum agent_return ret; + TDB_DATA data; + enum TDB_ERROR ecode; + union tdb_attribute cif; + + if (op != OPEN && op != OPEN_WITH_HOOK && !tdb) { + diag("external: No tdb open!"); + return OTHER_FAILURE; + } + + diag("external: %s", operation_name(op)); + + k = tdb_mkdata(name, strlen(name)); + + locking_would_block = 0; + switch (op) { + case OPEN: + if (tdb) { + diag("Already have tdb %s open", tdb->name); + return OTHER_FAILURE; + } + tdb = tdb_open(name, TDB_DEFAULT, O_RDWR, 0, &tap_log_attr); + if (!tdb) { + if (!locking_would_block) + diag("Opening tdb gave %s", strerror(errno)); + forget_locking(); + ret = OTHER_FAILURE; + } else + ret = SUCCESS; + break; + case OPEN_WITH_HOOK: + if (tdb) { + diag("Already have tdb %s open", tdb->name); + return OTHER_FAILURE; + } + cif.openhook.base.attr = TDB_ATTRIBUTE_OPENHOOK; + cif.openhook.base.next = &tap_log_attr; + cif.openhook.fn = clear_if_first; + tdb = tdb_open(name, TDB_DEFAULT, O_RDWR, 0, &cif); + if (!tdb) { + if (!locking_would_block) + diag("Opening tdb gave %s", strerror(errno)); + forget_locking(); + ret = OTHER_FAILURE; + } else + ret = SUCCESS; + break; + case FETCH: + ecode = tdb_fetch(tdb, k, &data); + if (ecode == TDB_ERR_NOEXIST) { + ret = FAILED; + } else if (ecode < 0) { + ret = OTHER_FAILURE; + } else if (!tdb_deq(data, k)) { + ret = OTHER_FAILURE; + free(data.dptr); + } else { + ret = SUCCESS; + free(data.dptr); + } + break; + case STORE: + ret = tdb_store(tdb, k, k, 0) == 0 ? SUCCESS : OTHER_FAILURE; + break; + case TRANSACTION_START: + ret = tdb_transaction_start(tdb) == 0 ? SUCCESS : OTHER_FAILURE; + break; + case TRANSACTION_COMMIT: + ret = tdb_transaction_commit(tdb)==0 ? SUCCESS : OTHER_FAILURE; + break; + case NEEDS_RECOVERY: + ret = tdb_needs_recovery(tdb) ? SUCCESS : FAILED; + break; + case CHECK: + ret = tdb_check(tdb, NULL, NULL) == 0 ? SUCCESS : OTHER_FAILURE; + break; + case CLOSE: + ret = tdb_close(tdb) == 0 ? SUCCESS : OTHER_FAILURE; + tdb = NULL; + break; + case SEND_SIGNAL: + /* We do this async */ + ret = SUCCESS; + break; + default: + ret = OTHER_FAILURE; + } + + if (locking_would_block) + ret = WOULD_HAVE_BLOCKED; + + return ret; +} + +struct agent { + int cmdfd, responsefd; +}; + +/* Do this before doing any tdb stuff. Return handle, or NULL. */ +struct agent *prepare_external_agent(void) +{ + int pid, ret; + int command[2], response[2]; + char name[1+PATH_MAX]; + + if (pipe(command) != 0 || pipe(response) != 0) + return NULL; + + pid = fork(); + if (pid < 0) + return NULL; + + if (pid != 0) { + struct agent *agent = malloc(sizeof(*agent)); + + close(command[0]); + close(response[1]); + agent->cmdfd = command[1]; + agent->responsefd = response[0]; + return agent; + } + + close(command[1]); + close(response[0]); + + /* We want to fail, not block. */ + nonblocking_locks = true; + log_prefix = "external: "; + while ((ret = read(command[0], name, sizeof(name))) > 0) { + enum agent_return result; + + result = do_operation(name[0], name+1); + if (write(response[1], &result, sizeof(result)) + != sizeof(result)) + err(1, "Writing response"); + if (name[0] == SEND_SIGNAL) { + struct timeval ten_ms; + ten_ms.tv_sec = 0; + ten_ms.tv_usec = 10000; + select(0, NULL, NULL, NULL, &ten_ms); + kill(getppid(), SIGUSR1); + } + } + exit(0); +} + +/* Ask the external agent to try to do an operation. */ +enum agent_return external_agent_operation(struct agent *agent, + enum operation op, + const char *name) +{ + enum agent_return res; + unsigned int len; + char *string; + + if (!name) + name = ""; + len = 1 + strlen(name) + 1; + string = malloc(len); + + string[0] = op; + strcpy(string+1, name); + + if (write(agent->cmdfd, string, len) != len + || read(agent->responsefd, &res, sizeof(res)) != sizeof(res)) + res = AGENT_DIED; + + free(string); + return res; +} + +const char *agent_return_name(enum agent_return ret) +{ + return ret == SUCCESS ? "SUCCESS" + : ret == WOULD_HAVE_BLOCKED ? "WOULD_HAVE_BLOCKED" + : ret == AGENT_DIED ? "AGENT_DIED" + : ret == FAILED ? "FAILED" + : ret == OTHER_FAILURE ? "OTHER_FAILURE" + : "**INVALID**"; +} + +const char *operation_name(enum operation op) +{ + switch (op) { + case OPEN: return "OPEN"; + case OPEN_WITH_HOOK: return "OPEN_WITH_HOOK"; + case FETCH: return "FETCH"; + case STORE: return "STORE"; + case CHECK: return "CHECK"; + case TRANSACTION_START: return "TRANSACTION_START"; + case TRANSACTION_COMMIT: return "TRANSACTION_COMMIT"; + case NEEDS_RECOVERY: return "NEEDS_RECOVERY"; + case SEND_SIGNAL: return "SEND_SIGNAL"; + case CLOSE: return "CLOSE"; + } + return "**INVALID**"; +} + +void free_external_agent(struct agent *agent) +{ + close(agent->cmdfd); + close(agent->responsefd); + free(agent); +} diff --git a/lib/tdb2/test/external-agent.h b/lib/tdb2/test/external-agent.h new file mode 100644 index 0000000000..9eada10750 --- /dev/null +++ b/lib/tdb2/test/external-agent.h @@ -0,0 +1,43 @@ +#ifndef TDB2_TEST_EXTERNAL_AGENT_H +#define TDB2_TEST_EXTERNAL_AGENT_H + +/* For locking tests, we need a different process to try things at + * various times. */ +enum operation { + OPEN, + OPEN_WITH_HOOK, + FETCH, + STORE, + TRANSACTION_START, + TRANSACTION_COMMIT, + NEEDS_RECOVERY, + CHECK, + SEND_SIGNAL, + CLOSE, +}; + +/* Do this before doing any tdb stuff. Return handle, or -1. */ +struct agent *prepare_external_agent(void); + +enum agent_return { + SUCCESS, + WOULD_HAVE_BLOCKED, + AGENT_DIED, + FAILED, /* For fetch, or NEEDS_RECOVERY */ + OTHER_FAILURE, +}; + +/* Ask the external agent to try to do an operation. + * name == tdb name for OPEN/OPEN_WITH_CLEAR_IF_FIRST, + * record name for FETCH/STORE (store stores name as data too) + */ +enum agent_return external_agent_operation(struct agent *handle, + enum operation op, + const char *name); + +/* Mapping enum -> string. */ +const char *agent_return_name(enum agent_return ret); +const char *operation_name(enum operation op); + +void free_external_agent(struct agent *agent); +#endif /* TDB2_TEST_EXTERNAL_AGENT_H */ diff --git a/lib/tdb2/test/failtest_helper.c b/lib/tdb2/test/failtest_helper.c new file mode 100644 index 0000000000..1358a6c6b2 --- /dev/null +++ b/lib/tdb2/test/failtest_helper.c @@ -0,0 +1,117 @@ +#include "failtest_helper.h" +#include "logging.h" +#include <string.h> +#include <ccan/tap/tap.h> + +/* FIXME: From ccan/str */ +static inline bool strends(const char *str, const char *postfix) +{ + if (strlen(str) < strlen(postfix)) + return false; + + return !strcmp(str + strlen(str) - strlen(postfix), postfix); +} + +bool failmatch(const struct failtest_call *call, + const char *file, int line, enum failtest_call_type type) +{ + return call->type == type + && call->line == line + && ((strcmp(call->file, file) == 0) + || (strends(call->file, file) + && (call->file[strlen(call->file) - strlen(file) - 1] + == '/'))); +} + +static const struct failtest_call * +find_repeat(const struct failtest_call *start, const struct failtest_call *end, + const struct failtest_call *call) +{ + const struct failtest_call *i; + + for (i = start; i < end; i++) { + if (failmatch(i, call->file, call->line, call->type)) + return i; + } + return NULL; +} + +static bool is_nonblocking_lock(const struct failtest_call *call) +{ + return call->type == FAILTEST_FCNTL && call->u.fcntl.cmd == F_SETLK; +} + +static bool is_unlock(const struct failtest_call *call) +{ + return call->type == FAILTEST_FCNTL + && call->u.fcntl.arg.fl.l_type == F_UNLCK; +} + +bool exit_check_log(struct failtest_call *history, unsigned num) +{ + unsigned int i; + + for (i = 0; i < num; i++) { + if (!history[i].fail) + continue; + /* Failing the /dev/urandom open doesn't count: we fall back. */ + if (failmatch(&history[i], URANDOM_OPEN)) + continue; + + /* Similarly with read fail. */ + if (failmatch(&history[i], URANDOM_READ)) + continue; + + /* Initial allocation of tdb doesn't log. */ + if (failmatch(&history[i], INITIAL_TDB_MALLOC)) + continue; + + /* We don't block "failures" on non-blocking locks. */ + if (is_nonblocking_lock(&history[i])) + continue; + + if (!tap_log_messages) + diag("We didn't log for %u (%s:%u)", + i, history[i].file, history[i].line); + return tap_log_messages != 0; + } + return true; +} + +/* Some places we soldier on despite errors: only fail them once. */ +enum failtest_result +block_repeat_failures(struct failtest_call *history, unsigned num) +{ + const struct failtest_call *i, *last = &history[num-1]; + + if (failmatch(last, INITIAL_TDB_MALLOC) + || failmatch(last, URANDOM_OPEN) + || failmatch(last, URANDOM_READ)) { + if (find_repeat(history, last, last)) + return FAIL_DONT_FAIL; + return FAIL_PROBE; + } + + /* Unlock or non-blocking lock is fail-once. */ + if (is_unlock(last)) { + /* Find a previous unlock at this point? */ + for (i = find_repeat(history, last, last); + i; + i = find_repeat(history, i, last)) { + if (is_unlock(i)) + return FAIL_DONT_FAIL; + } + return FAIL_PROBE; + } else if (is_nonblocking_lock(last)) { + /* Find a previous non-blocking lock at this point? */ + for (i = find_repeat(history, last, last); + i; + i = find_repeat(history, i, last)) { + if (is_nonblocking_lock(i)) + return FAIL_DONT_FAIL; + } + return FAIL_PROBE; + } + + return FAIL_OK; +} diff --git a/lib/tdb2/test/failtest_helper.h b/lib/tdb2/test/failtest_helper.h new file mode 100644 index 0000000000..a62efbad58 --- /dev/null +++ b/lib/tdb2/test/failtest_helper.h @@ -0,0 +1,17 @@ +#ifndef TDB2_TEST_FAILTEST_HELPER_H +#define TDB2_TEST_FAILTEST_HELPER_H +#include <ccan/failtest/failtest.h> +#include <stdbool.h> + +/* FIXME: Check these! */ +#define INITIAL_TDB_MALLOC "open.c", 338, FAILTEST_MALLOC +#define URANDOM_OPEN "open.c", 45, FAILTEST_OPEN +#define URANDOM_READ "open.c", 25, FAILTEST_READ + +bool exit_check_log(struct failtest_call *history, unsigned num); +bool failmatch(const struct failtest_call *call, + const char *file, int line, enum failtest_call_type type); +enum failtest_result +block_repeat_failures(struct failtest_call *history, unsigned num); + +#endif /* TDB2_TEST_LOGGING_H */ diff --git a/lib/tdb2/test/layout.c b/lib/tdb2/test/layout.c new file mode 100644 index 0000000000..31889ad080 --- /dev/null +++ b/lib/tdb2/test/layout.c @@ -0,0 +1,348 @@ +/* TDB tools to create various canned database layouts. */ +#include "layout.h" +#include <stdlib.h> +#include <string.h> +#include <assert.h> +#include <err.h> +#include "logging.h" + +struct tdb_layout *new_tdb_layout(const char *filename) +{ + struct tdb_layout *layout = malloc(sizeof(*layout)); + layout->filename = filename; + layout->num_elems = 0; + layout->elem = NULL; + return layout; +} + +static void add(struct tdb_layout *layout, union tdb_layout_elem elem) +{ + layout->elem = realloc(layout->elem, + sizeof(layout->elem[0]) + * (layout->num_elems+1)); + layout->elem[layout->num_elems++] = elem; +} + +void tdb_layout_add_freetable(struct tdb_layout *layout) +{ + union tdb_layout_elem elem; + elem.base.type = FREETABLE; + add(layout, elem); +} + +void tdb_layout_add_free(struct tdb_layout *layout, tdb_len_t len, + unsigned ftable) +{ + union tdb_layout_elem elem; + elem.base.type = FREE; + elem.free.len = len; + elem.free.ftable_num = ftable; + add(layout, elem); +} + +static struct tdb_data dup_key(struct tdb_data key) +{ + struct tdb_data ret; + ret.dsize = key.dsize; + ret.dptr = malloc(ret.dsize); + memcpy(ret.dptr, key.dptr, ret.dsize); + return ret; +} + +void tdb_layout_add_used(struct tdb_layout *layout, + TDB_DATA key, TDB_DATA data, + tdb_len_t extra) +{ + union tdb_layout_elem elem; + elem.base.type = DATA; + elem.used.key = dup_key(key); + elem.used.data = dup_key(data); + elem.used.extra = extra; + add(layout, elem); +} + +static tdb_len_t free_record_len(tdb_len_t len) +{ + return sizeof(struct tdb_used_record) + len; +} + +static tdb_len_t data_record_len(struct tle_used *used) +{ + tdb_len_t len; + len = sizeof(struct tdb_used_record) + + used->key.dsize + used->data.dsize + used->extra; + assert(len >= sizeof(struct tdb_free_record)); + return len; +} + +static tdb_len_t hashtable_len(struct tle_hashtable *htable) +{ + return sizeof(struct tdb_used_record) + + (sizeof(tdb_off_t) << TDB_SUBLEVEL_HASH_BITS) + + htable->extra; +} + +static tdb_len_t freetable_len(struct tle_freetable *ftable) +{ + return sizeof(struct tdb_freetable); +} + +static void set_free_record(void *mem, tdb_len_t len) +{ + /* We do all the work in add_to_freetable */ +} + +static void add_zero_pad(struct tdb_used_record *u, size_t len, size_t extra) +{ + if (extra) + ((char *)(u + 1))[len] = '\0'; +} + +static void set_data_record(void *mem, struct tdb_context *tdb, + struct tle_used *used) +{ + struct tdb_used_record *u = mem; + + set_header(tdb, u, TDB_USED_MAGIC, used->key.dsize, used->data.dsize, + used->key.dsize + used->data.dsize + used->extra, + tdb_hash(tdb, used->key.dptr, used->key.dsize)); + memcpy(u + 1, used->key.dptr, used->key.dsize); + memcpy((char *)(u + 1) + used->key.dsize, + used->data.dptr, used->data.dsize); + add_zero_pad(u, used->key.dsize + used->data.dsize, used->extra); +} + +static void set_hashtable(void *mem, struct tdb_context *tdb, + struct tle_hashtable *htable) +{ + struct tdb_used_record *u = mem; + tdb_len_t len = sizeof(tdb_off_t) << TDB_SUBLEVEL_HASH_BITS; + + set_header(tdb, u, TDB_HTABLE_MAGIC, 0, len, len + htable->extra, 0); + memset(u + 1, 0, len); + add_zero_pad(u, len, htable->extra); +} + +static void set_freetable(void *mem, struct tdb_context *tdb, + struct tle_freetable *freetable, struct tdb_header *hdr, + tdb_off_t last_ftable) +{ + struct tdb_freetable *ftable = mem; + memset(ftable, 0, sizeof(*ftable)); + set_header(tdb, &ftable->hdr, TDB_FTABLE_MAGIC, 0, + sizeof(*ftable) - sizeof(ftable->hdr), + sizeof(*ftable) - sizeof(ftable->hdr), 0); + + if (last_ftable) { + ftable = (struct tdb_freetable *)((char *)hdr + last_ftable); + ftable->next = freetable->base.off; + } else { + hdr->free_table = freetable->base.off; + } +} + +static void add_to_freetable(struct tdb_context *tdb, + tdb_off_t eoff, + tdb_off_t elen, + unsigned ftable, + struct tle_freetable *freetable) +{ + tdb->ftable_off = freetable->base.off; + tdb->ftable = ftable; + add_free_record(tdb, eoff, sizeof(struct tdb_used_record) + elen, + TDB_LOCK_WAIT, false); +} + +static tdb_off_t hbucket_off(tdb_off_t group_start, unsigned ingroup) +{ + return group_start + + (ingroup % (1 << TDB_HASH_GROUP_BITS)) * sizeof(tdb_off_t); +} + +/* Get bits from a value. */ +static uint32_t bits(uint64_t val, unsigned start, unsigned num) +{ + assert(num <= 32); + return (val >> start) & ((1U << num) - 1); +} + +/* We take bits from the top: that way we can lock whole sections of the hash + * by using lock ranges. */ +static uint32_t use_bits(uint64_t h, unsigned num, unsigned *used) +{ + *used += num; + return bits(h, 64 - *used, num); +} + +static tdb_off_t encode_offset(tdb_off_t new_off, unsigned bucket, + uint64_t h) +{ + return bucket + | new_off + | ((uint64_t)bits(h, 64 - TDB_OFF_UPPER_STEAL_EXTRA, + TDB_OFF_UPPER_STEAL_EXTRA) + << TDB_OFF_HASH_EXTRA_BIT); +} + +/* FIXME: Our hash table handling here is primitive: we don't expand! */ +static void add_to_hashtable(struct tdb_context *tdb, + tdb_off_t eoff, + struct tdb_data key) +{ + uint64_t h = tdb_hash(tdb, key.dptr, key.dsize); + tdb_off_t b_off, group_start; + unsigned i, group, in_group; + unsigned used = 0; + + group = use_bits(h, TDB_TOPLEVEL_HASH_BITS-TDB_HASH_GROUP_BITS, &used); + in_group = use_bits(h, TDB_HASH_GROUP_BITS, &used); + + group_start = offsetof(struct tdb_header, hashtable) + + group * (sizeof(tdb_off_t) << TDB_HASH_GROUP_BITS); + + for (i = 0; i < (1 << TDB_HASH_GROUP_BITS); i++) { + unsigned bucket = (in_group + i) % (1 << TDB_HASH_GROUP_BITS); + + b_off = hbucket_off(group_start, bucket); + if (tdb_read_off(tdb, b_off) == 0) { + tdb_write_off(tdb, b_off, + encode_offset(eoff, bucket, h)); + return; + } + } + abort(); +} + +static struct tle_freetable *find_ftable(struct tdb_layout *layout, unsigned num) +{ + unsigned i; + + for (i = 0; i < layout->num_elems; i++) { + if (layout->elem[i].base.type != FREETABLE) + continue; + if (num == 0) + return &layout->elem[i].ftable; + num--; + } + abort(); +} + +/* FIXME: Support TDB_CONVERT */ +struct tdb_context *tdb_layout_get(struct tdb_layout *layout) +{ + unsigned int i; + tdb_off_t off, len, last_ftable; + char *mem; + struct tdb_context *tdb; + + off = sizeof(struct tdb_header); + + /* First pass of layout: calc lengths */ + for (i = 0; i < layout->num_elems; i++) { + union tdb_layout_elem *e = &layout->elem[i]; + e->base.off = off; + switch (e->base.type) { + case FREETABLE: + len = freetable_len(&e->ftable); + break; + case FREE: + len = free_record_len(e->free.len); + break; + case DATA: + len = data_record_len(&e->used); + break; + case HASHTABLE: + len = hashtable_len(&e->hashtable); + break; + default: + abort(); + } + off += len; + } + + mem = malloc(off); + /* Fill with some weird pattern. */ + memset(mem, 0x99, off); + /* Now populate our header, cribbing from a real TDB header. */ + tdb = tdb_open(NULL, TDB_INTERNAL, O_RDWR, 0, &tap_log_attr); + memcpy(mem, tdb->file->map_ptr, sizeof(struct tdb_header)); + + /* Mug the tdb we have to make it use this. */ + free(tdb->file->map_ptr); + tdb->file->map_ptr = mem; + tdb->file->map_size = off; + + last_ftable = 0; + for (i = 0; i < layout->num_elems; i++) { + union tdb_layout_elem *e = &layout->elem[i]; + switch (e->base.type) { + case FREETABLE: + set_freetable(mem + e->base.off, tdb, &e->ftable, + (struct tdb_header *)mem, last_ftable); + last_ftable = e->base.off; + break; + case FREE: + set_free_record(mem + e->base.off, e->free.len); + break; + case DATA: + set_data_record(mem + e->base.off, tdb, &e->used); + break; + case HASHTABLE: + set_hashtable(mem + e->base.off, tdb, &e->hashtable); + break; + } + } + /* Must have a free table! */ + assert(last_ftable); + + /* Now fill the free and hash tables. */ + for (i = 0; i < layout->num_elems; i++) { + union tdb_layout_elem *e = &layout->elem[i]; + switch (e->base.type) { + case FREE: + add_to_freetable(tdb, e->base.off, e->free.len, + e->free.ftable_num, + find_ftable(layout, e->free.ftable_num)); + break; + case DATA: + add_to_hashtable(tdb, e->base.off, e->used.key); + break; + default: + break; + } + } + + tdb->ftable_off = find_ftable(layout, 0)->base.off; + + /* Get physical if they asked for it. */ + if (layout->filename) { + int fd = open(layout->filename, O_WRONLY|O_TRUNC|O_CREAT, + 0600); + if (fd < 0) + err(1, "opening %s for writing", layout->filename); + if (write(fd, tdb->file->map_ptr, tdb->file->map_size) + != tdb->file->map_size) + err(1, "writing %s", layout->filename); + close(fd); + tdb_close(tdb); + /* NOMMAP is for lockcheck. */ + tdb = tdb_open(layout->filename, TDB_NOMMAP, O_RDWR, 0, + &tap_log_attr); + } + + return tdb; +} + +void tdb_layout_free(struct tdb_layout *layout) +{ + unsigned int i; + + for (i = 0; i < layout->num_elems; i++) { + if (layout->elem[i].base.type == DATA) { + free(layout->elem[i].used.key.dptr); + free(layout->elem[i].used.data.dptr); + } + } + free(layout->elem); + free(layout); +} diff --git a/lib/tdb2/test/layout.h b/lib/tdb2/test/layout.h new file mode 100644 index 0000000000..6e2e6657a7 --- /dev/null +++ b/lib/tdb2/test/layout.h @@ -0,0 +1,68 @@ +#ifndef TDB2_TEST_LAYOUT_H +#define TDB2_TEST_LAYOUT_H +#include <ccan/tdb2/private.h> + +struct tdb_layout *new_tdb_layout(const char *filename); +void tdb_layout_add_freetable(struct tdb_layout *layout); +void tdb_layout_add_free(struct tdb_layout *layout, tdb_len_t len, + unsigned ftable); +void tdb_layout_add_used(struct tdb_layout *layout, + TDB_DATA key, TDB_DATA data, + tdb_len_t extra); +#if 0 /* FIXME: Allow allocation of subtables */ +void tdb_layout_add_hashtable(struct tdb_layout *layout, + int htable_parent, /* -1 == toplevel */ + unsigned int bucket, + tdb_len_t extra); +#endif +struct tdb_context *tdb_layout_get(struct tdb_layout *layout); +void tdb_layout_free(struct tdb_layout *layout); + +enum layout_type { + FREETABLE, FREE, DATA, HASHTABLE, +}; + +/* Shared by all union members. */ +struct tle_base { + enum layout_type type; + tdb_off_t off; +}; + +struct tle_freetable { + struct tle_base base; +}; + +struct tle_free { + struct tle_base base; + tdb_len_t len; + unsigned ftable_num; +}; + +struct tle_used { + struct tle_base base; + TDB_DATA key; + TDB_DATA data; + tdb_len_t extra; +}; + +struct tle_hashtable { + struct tle_base base; + int parent; + unsigned int bucket; + tdb_len_t extra; +}; + +union tdb_layout_elem { + struct tle_base base; + struct tle_freetable ftable; + struct tle_free free; + struct tle_used used; + struct tle_hashtable hashtable; +}; + +struct tdb_layout { + const char *filename; + unsigned int num_elems; + union tdb_layout_elem *elem; +}; +#endif /* TDB2_TEST_LAYOUT_H */ diff --git a/lib/tdb2/test/lock-tracking.c b/lib/tdb2/test/lock-tracking.c new file mode 100644 index 0000000000..05dba32fd3 --- /dev/null +++ b/lib/tdb2/test/lock-tracking.c @@ -0,0 +1,147 @@ +/* We save the locks so we can reaquire them. */ +#include <unistd.h> +#include <fcntl.h> +#include <stdarg.h> +#include <stdlib.h> +#include <ccan/tap/tap.h> +#include <ccan/tdb2/private.h> +#include "lock-tracking.h" + +struct lock { + struct lock *next; + unsigned int off; + unsigned int len; + int type; +}; +static struct lock *locks; +int locking_errors = 0; +bool suppress_lockcheck = false; +bool nonblocking_locks; +int locking_would_block = 0; +void (*unlock_callback)(int fd); + +int fcntl_with_lockcheck(int fd, int cmd, ... /* arg */ ) +{ + va_list ap; + int ret, arg3; + struct flock *fl; + bool may_block = false; + + if (cmd != F_SETLK && cmd != F_SETLKW) { + /* This may be totally bogus, but we don't know in general. */ + va_start(ap, cmd); + arg3 = va_arg(ap, int); + va_end(ap); + + return fcntl(fd, cmd, arg3); + } + + va_start(ap, cmd); + fl = va_arg(ap, struct flock *); + va_end(ap); + + if (cmd == F_SETLKW && nonblocking_locks) { + cmd = F_SETLK; + may_block = true; + } + ret = fcntl(fd, cmd, fl); + + /* Detect when we failed, but might have been OK if we waited. */ + if (may_block && ret == -1 && (errno == EAGAIN || errno == EACCES)) { + locking_would_block++; + } + + if (fl->l_type == F_UNLCK) { + struct lock **l; + struct lock *old = NULL; + + for (l = &locks; *l; l = &(*l)->next) { + if ((*l)->off == fl->l_start + && (*l)->len == fl->l_len) { + if (ret == 0) { + old = *l; + *l = (*l)->next; + free(old); + } + break; + } + } + if (!old && !suppress_lockcheck) { + diag("Unknown unlock %u@%u - %i", + (int)fl->l_len, (int)fl->l_start, ret); + locking_errors++; + } + } else { + struct lock *new, *i; + unsigned int fl_end = fl->l_start + fl->l_len; + if (fl->l_len == 0) + fl_end = (unsigned int)-1; + + /* Check for overlaps: we shouldn't do this. */ + for (i = locks; i; i = i->next) { + unsigned int i_end = i->off + i->len; + if (i->len == 0) + i_end = (unsigned int)-1; + + if (fl->l_start >= i->off && fl->l_start < i_end) + break; + if (fl_end > i->off && fl_end < i_end) + break; + + /* tdb_allrecord_lock does this, handle adjacent: */ + if (fl->l_start > TDB_HASH_LOCK_START + && fl->l_start == i_end && fl->l_type == i->type) { + if (ret == 0) { + i->len = fl->l_len + ? i->len + fl->l_len + : 0; + } + goto done; + } + } + if (i) { + /* Special case: upgrade of allrecord lock. */ + if (i->type == F_RDLCK && fl->l_type == F_WRLCK + && i->off == TDB_HASH_LOCK_START + && fl->l_start == TDB_HASH_LOCK_START + && i->len == 0 + && fl->l_len == 0) { + if (ret == 0) + i->type = F_WRLCK; + goto done; + } + if (!suppress_lockcheck) { + diag("%s lock %u@%u overlaps %u@%u", + fl->l_type == F_WRLCK ? "write" : "read", + (int)fl->l_len, (int)fl->l_start, + i->len, (int)i->off); + locking_errors++; + } + } + + if (ret == 0) { + new = malloc(sizeof *new); + new->off = fl->l_start; + new->len = fl->l_len; + new->type = fl->l_type; + new->next = locks; + locks = new; + } + } +done: + if (ret == 0 && fl->l_type == F_UNLCK && unlock_callback) + unlock_callback(fd); + return ret; +} + +unsigned int forget_locking(void) +{ + unsigned int num = 0; + while (locks) { + struct lock *next = locks->next; + free(locks); + locks = next; + num++; + } + return num; +} diff --git a/lib/tdb2/test/lock-tracking.h b/lib/tdb2/test/lock-tracking.h new file mode 100644 index 0000000000..f2c9c44653 --- /dev/null +++ b/lib/tdb2/test/lock-tracking.h @@ -0,0 +1,25 @@ +#ifndef LOCK_TRACKING_H +#define LOCK_TRACKING_H +#include <stdbool.h> + +/* Set this if you want a callback after fnctl unlock. */ +extern void (*unlock_callback)(int fd); + +/* Replacement fcntl. */ +int fcntl_with_lockcheck(int fd, int cmd, ... /* arg */ ); + +/* Discard locking info: returns number of locks outstanding. */ +unsigned int forget_locking(void); + +/* Number of errors in locking. */ +extern int locking_errors; + +/* Suppress lock checking. */ +extern bool suppress_lockcheck; + +/* Make all locks non-blocking. */ +extern bool nonblocking_locks; + +/* Number of times we failed a lock because we made it non-blocking. */ +extern int locking_would_block; +#endif /* LOCK_TRACKING_H */ diff --git a/lib/tdb2/test/logging.c b/lib/tdb2/test/logging.c new file mode 100644 index 0000000000..d32cfa9b59 --- /dev/null +++ b/lib/tdb2/test/logging.c @@ -0,0 +1,24 @@ +#include <stdio.h> +#include <stdlib.h> +#include <ccan/tap/tap.h> +#include "logging.h" + +unsigned tap_log_messages; +const char *log_prefix = ""; +bool suppress_logging; + +union tdb_attribute tap_log_attr = { + .log = { .base = { .attr = TDB_ATTRIBUTE_LOG }, + .fn = tap_log_fn } +}; + +void tap_log_fn(struct tdb_context *tdb, + enum tdb_log_level level, + const char *message, void *priv) +{ + if (suppress_logging) + return; + + diag("tdb log level %u: %s%s", level, log_prefix, message); + tap_log_messages++; +} diff --git a/lib/tdb2/test/logging.h b/lib/tdb2/test/logging.h new file mode 100644 index 0000000000..d172f867fd --- /dev/null +++ b/lib/tdb2/test/logging.h @@ -0,0 +1,15 @@ +#ifndef TDB2_TEST_LOGGING_H +#define TDB2_TEST_LOGGING_H +#include <ccan/tdb2/tdb2.h> +#include <stdbool.h> +#include <string.h> + +extern bool suppress_logging; +extern const char *log_prefix; +extern unsigned tap_log_messages; +extern union tdb_attribute tap_log_attr; + +void tap_log_fn(struct tdb_context *tdb, + enum tdb_log_level level, + const char *message, void *priv); +#endif /* TDB2_TEST_LOGGING_H */ diff --git a/lib/tdb2/test/run-001-encode.c b/lib/tdb2/test/run-001-encode.c new file mode 100644 index 0000000000..ffa4b93c02 --- /dev/null +++ b/lib/tdb2/test/run-001-encode.c @@ -0,0 +1,48 @@ +#include <ccan/tdb2/tdb.c> +#include <ccan/tdb2/open.c> +#include <ccan/tdb2/free.c> +#include <ccan/tdb2/lock.c> +#include <ccan/tdb2/hash.c> +#include <ccan/tdb2/io.c> +#include <ccan/tdb2/transaction.c> +#include <ccan/tdb2/check.c> +#include <ccan/tap/tap.h> +#include "logging.h" + +int main(int argc, char *argv[]) +{ + unsigned int i; + struct tdb_used_record rec; + struct tdb_context tdb = { .log_fn = tap_log_fn }; + + plan_tests(64 + 32 + 48*6 + 1); + + /* We should be able to encode any data value. */ + for (i = 0; i < 64; i++) + ok1(set_header(&tdb, &rec, TDB_USED_MAGIC, 0, 1ULL << i, + 1ULL << i, 0) == 0); + + /* And any key and data with < 64 bits between them. */ + for (i = 0; i < 32; i++) { + tdb_len_t dlen = 1ULL >> (63 - i), klen = 1ULL << i; + ok1(set_header(&tdb, &rec, TDB_USED_MAGIC, klen, dlen, + klen + dlen, 0) == 0); + } + + /* We should neatly encode all values. */ + for (i = 0; i < 48; i++) { + uint64_t h = 1ULL << (i < 5 ? i : 4); + uint64_t klen = 1ULL << (i < 16 ? i : 15); + uint64_t dlen = 1ULL << i; + uint64_t xlen = 1ULL << (i < 32 ? i : 31); + ok1(set_header(&tdb, &rec, TDB_USED_MAGIC, klen, dlen, + klen+dlen+xlen, h) == 0); + ok1(rec_key_length(&rec) == klen); + ok1(rec_data_length(&rec) == dlen); + ok1(rec_extra_padding(&rec) == xlen); + ok1((uint64_t)rec_hash(&rec) == h); + ok1(rec_magic(&rec) == TDB_USED_MAGIC); + } + ok1(tap_log_messages == 0); + return exit_status(); +} diff --git a/lib/tdb2/test/run-001-fls.c b/lib/tdb2/test/run-001-fls.c new file mode 100644 index 0000000000..d54cad1d1c --- /dev/null +++ b/lib/tdb2/test/run-001-fls.c @@ -0,0 +1,40 @@ +#include <ccan/tdb2/tdb.c> +#include <ccan/tdb2/open.c> +#include <ccan/tdb2/free.c> +#include <ccan/tdb2/lock.c> +#include <ccan/tdb2/io.c> +#include <ccan/tdb2/hash.c> +#include <ccan/tdb2/transaction.c> +#include <ccan/tdb2/check.c> +#include <ccan/tap/tap.h> + +static unsigned int dumb_fls(uint64_t num) +{ + int i; + + for (i = 63; i >= 0; i--) { + if (num & (1ULL << i)) + break; + } + return i + 1; +} + +int main(int argc, char *argv[]) +{ + unsigned int i, j; + + plan_tests(64 * 64 + 2); + + ok1(fls64(0) == 0); + ok1(dumb_fls(0) == 0); + + for (i = 0; i < 64; i++) { + for (j = 0; j < 64; j++) { + uint64_t val = (1ULL << i) | (1ULL << j); + ok(fls64(val) == dumb_fls(val), + "%llu -> %u should be %u", (long long)val, + fls64(val), dumb_fls(val)); + } + } + return exit_status(); +} diff --git a/lib/tdb2/test/run-01-new_database.c b/lib/tdb2/test/run-01-new_database.c new file mode 100644 index 0000000000..32ebaf09c1 --- /dev/null +++ b/lib/tdb2/test/run-01-new_database.c @@ -0,0 +1,42 @@ +#include <ccan/failtest/failtest_override.h> +#include <ccan/tdb2/tdb.c> +#include <ccan/tdb2/open.c> +#include <ccan/tdb2/free.c> +#include <ccan/tdb2/lock.c> +#include <ccan/tdb2/io.c> +#include <ccan/tdb2/hash.c> +#include <ccan/tdb2/transaction.c> +#include <ccan/tdb2/check.c> +#include <ccan/tap/tap.h> +#include <ccan/failtest/failtest.h> +#include "logging.h" +#include "failtest_helper.h" + +int main(int argc, char *argv[]) +{ + unsigned int i; + struct tdb_context *tdb; + int flags[] = { TDB_INTERNAL, TDB_DEFAULT, TDB_NOMMAP, + TDB_INTERNAL|TDB_CONVERT, TDB_CONVERT, + TDB_NOMMAP|TDB_CONVERT }; + + failtest_init(argc, argv); + failtest_hook = block_repeat_failures; + failtest_exit_check = exit_check_log; + plan_tests(sizeof(flags) / sizeof(flags[0]) * 3); + for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { + tdb = tdb_open("run-new_database.tdb", flags[i], + O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr); + if (!ok1(tdb)) + failtest_exit(exit_status()); + if (tdb) { + bool ok = ok1(tdb_check(tdb, NULL, NULL) == 0); + tdb_close(tdb); + if (!ok) + failtest_exit(exit_status()); + } + if (!ok1(tap_log_messages == 0)) + break; + } + failtest_exit(exit_status()); +} diff --git a/lib/tdb2/test/run-02-expand.c b/lib/tdb2/test/run-02-expand.c new file mode 100644 index 0000000000..6666ae167e --- /dev/null +++ b/lib/tdb2/test/run-02-expand.c @@ -0,0 +1,80 @@ +#include <ccan/failtest/failtest_override.h> +#include <ccan/tdb2/tdb.c> +#include <ccan/tdb2/open.c> +#include <ccan/tdb2/free.c> +#include <ccan/tdb2/lock.c> +#include <ccan/tdb2/io.c> +#include <ccan/tdb2/check.c> +#include <ccan/tdb2/transaction.c> +#include <ccan/tdb2/hash.c> +#include <ccan/tap/tap.h> +#include <ccan/failtest/failtest.h> +#include "logging.h" +#include "failtest_helper.h" + +static bool failtest_suppress = false; + +/* Don't need to test everything here, just want expand testing. */ +static enum failtest_result +suppress_failure(struct failtest_call *history, unsigned num) +{ + if (failtest_suppress) + return FAIL_DONT_FAIL; + return block_repeat_failures(history, num); +} + +int main(int argc, char *argv[]) +{ + unsigned int i; + uint64_t val; + struct tdb_context *tdb; + int flags[] = { TDB_INTERNAL, TDB_DEFAULT, TDB_NOMMAP, + TDB_INTERNAL|TDB_CONVERT, TDB_CONVERT, + TDB_NOMMAP|TDB_CONVERT }; + + plan_tests(sizeof(flags) / sizeof(flags[0]) * 11 + 1); + + failtest_init(argc, argv); + failtest_hook = suppress_failure; + failtest_exit_check = exit_check_log; + + for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { + failtest_suppress = true; + tdb = tdb_open("run-expand.tdb", flags[i], + O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr); + if (!ok1(tdb)) + break; + + val = tdb->file->map_size; + /* Need some hash lock for expand. */ + ok1(tdb_lock_hashes(tdb, 0, 1, F_WRLCK, TDB_LOCK_WAIT) == 0); + failtest_suppress = false; + if (!ok1(tdb_expand(tdb, 1) == 0)) { + failtest_suppress = true; + tdb_close(tdb); + break; + } + failtest_suppress = true; + + ok1(tdb->file->map_size >= val + 1 * TDB_EXTENSION_FACTOR); + ok1(tdb_unlock_hashes(tdb, 0, 1, F_WRLCK) == 0); + ok1(tdb_check(tdb, NULL, NULL) == 0); + + val = tdb->file->map_size; + ok1(tdb_lock_hashes(tdb, 0, 1, F_WRLCK, TDB_LOCK_WAIT) == 0); + failtest_suppress = false; + if (!ok1(tdb_expand(tdb, 1024) == 0)) { + failtest_suppress = true; + tdb_close(tdb); + break; + } + failtest_suppress = true; + ok1(tdb_unlock_hashes(tdb, 0, 1, F_WRLCK) == 0); + ok1(tdb->file->map_size >= val + 1024 * TDB_EXTENSION_FACTOR); + ok1(tdb_check(tdb, NULL, NULL) == 0); + tdb_close(tdb); + } + + ok1(tap_log_messages == 0); + failtest_exit(exit_status()); +} diff --git a/lib/tdb2/test/run-03-coalesce.c b/lib/tdb2/test/run-03-coalesce.c new file mode 100644 index 0000000000..3fdd11c077 --- /dev/null +++ b/lib/tdb2/test/run-03-coalesce.c @@ -0,0 +1,170 @@ +#include <ccan/tdb2/tdb.c> +#include <ccan/tdb2/open.c> +#include <ccan/tdb2/free.c> +#include <ccan/tdb2/lock.c> +#include <ccan/tdb2/io.c> +#include <ccan/tdb2/hash.c> +#include <ccan/tdb2/check.c> +#include <ccan/tdb2/transaction.c> +#include <ccan/tap/tap.h> +#include "logging.h" +#include "layout.h" + +static tdb_len_t free_record_length(struct tdb_context *tdb, tdb_off_t off) +{ + struct tdb_free_record f; + enum TDB_ERROR ecode; + + ecode = tdb_read_convert(tdb, off, &f, sizeof(f)); + if (ecode != TDB_SUCCESS) + return ecode; + if (frec_magic(&f) != TDB_FREE_MAGIC) + return TDB_ERR_CORRUPT; + return frec_len(&f); +} + +int main(int argc, char *argv[]) +{ + tdb_off_t b_off, test; + struct tdb_context *tdb; + struct tdb_layout *layout; + struct tdb_data data, key; + tdb_len_t len; + + /* FIXME: Test TDB_CONVERT */ + /* FIXME: Test lock order fail. */ + + plan_tests(42); + data = tdb_mkdata("world", 5); + key = tdb_mkdata("hello", 5); + + /* No coalescing can be done due to EOF */ + layout = new_tdb_layout("run-03-coalesce.tdb"); + tdb_layout_add_freetable(layout); + len = 1024; + tdb_layout_add_free(layout, len, 0); + tdb = tdb_layout_get(layout); + ok1(tdb_check(tdb, NULL, NULL) == 0); + ok1(free_record_length(tdb, layout->elem[1].base.off) == len); + + /* Figure out which bucket free entry is. */ + b_off = bucket_off(tdb->ftable_off, size_to_bucket(len)); + /* Lock and fail to coalesce. */ + ok1(tdb_lock_free_bucket(tdb, b_off, TDB_LOCK_WAIT) == 0); + test = layout->elem[1].base.off; + ok1(coalesce(tdb, layout->elem[1].base.off, b_off, len, &test) + == 0); + tdb_unlock_free_bucket(tdb, b_off); + ok1(free_record_length(tdb, layout->elem[1].base.off) == len); + ok1(test == layout->elem[1].base.off); + ok1(tdb_check(tdb, NULL, NULL) == 0); + tdb_close(tdb); + tdb_layout_free(layout); + + /* No coalescing can be done due to used record */ + layout = new_tdb_layout("run-03-coalesce.tdb"); + tdb_layout_add_freetable(layout); + tdb_layout_add_free(layout, 1024, 0); + tdb_layout_add_used(layout, key, data, 6); + tdb = tdb_layout_get(layout); + ok1(free_record_length(tdb, layout->elem[1].base.off) == 1024); + ok1(tdb_check(tdb, NULL, NULL) == 0); + + /* Figure out which bucket free entry is. */ + b_off = bucket_off(tdb->ftable_off, size_to_bucket(1024)); + /* Lock and fail to coalesce. */ + ok1(tdb_lock_free_bucket(tdb, b_off, TDB_LOCK_WAIT) == 0); + test = layout->elem[1].base.off; + ok1(coalesce(tdb, layout->elem[1].base.off, b_off, 1024, &test) + == 0); + tdb_unlock_free_bucket(tdb, b_off); + ok1(free_record_length(tdb, layout->elem[1].base.off) == 1024); + ok1(test == layout->elem[1].base.off); + ok1(tdb_check(tdb, NULL, NULL) == 0); + tdb_close(tdb); + tdb_layout_free(layout); + + /* Coalescing can be done due to two free records, then EOF */ + layout = new_tdb_layout("run-03-coalesce.tdb"); + tdb_layout_add_freetable(layout); + tdb_layout_add_free(layout, 1024, 0); + tdb_layout_add_free(layout, 2048, 0); + tdb = tdb_layout_get(layout); + ok1(free_record_length(tdb, layout->elem[1].base.off) == 1024); + ok1(free_record_length(tdb, layout->elem[2].base.off) == 2048); + ok1(tdb_check(tdb, NULL, NULL) == 0); + + /* Figure out which bucket (first) free entry is. */ + b_off = bucket_off(tdb->ftable_off, size_to_bucket(1024)); + /* Lock and coalesce. */ + ok1(tdb_lock_free_bucket(tdb, b_off, TDB_LOCK_WAIT) == 0); + test = layout->elem[2].base.off; + ok1(coalesce(tdb, layout->elem[1].base.off, b_off, 1024, &test) + == 1024 + sizeof(struct tdb_used_record) + 2048); + /* Should tell us it's erased this one... */ + ok1(test == TDB_ERR_NOEXIST); + ok1(tdb->file->allrecord_lock.count == 0 && tdb->file->num_lockrecs == 0); + ok1(free_record_length(tdb, layout->elem[1].base.off) + == 1024 + sizeof(struct tdb_used_record) + 2048); + ok1(tdb_check(tdb, NULL, NULL) == 0); + tdb_close(tdb); + tdb_layout_free(layout); + + /* Coalescing can be done due to two free records, then data */ + layout = new_tdb_layout("run-03-coalesce.tdb"); + tdb_layout_add_freetable(layout); + tdb_layout_add_free(layout, 1024, 0); + tdb_layout_add_free(layout, 512, 0); + tdb_layout_add_used(layout, key, data, 6); + tdb = tdb_layout_get(layout); + ok1(free_record_length(tdb, layout->elem[1].base.off) == 1024); + ok1(free_record_length(tdb, layout->elem[2].base.off) == 512); + ok1(tdb_check(tdb, NULL, NULL) == 0); + + /* Figure out which bucket free entry is. */ + b_off = bucket_off(tdb->ftable_off, size_to_bucket(1024)); + /* Lock and coalesce. */ + ok1(tdb_lock_free_bucket(tdb, b_off, TDB_LOCK_WAIT) == 0); + test = layout->elem[2].base.off; + ok1(coalesce(tdb, layout->elem[1].base.off, b_off, 1024, &test) + == 1024 + sizeof(struct tdb_used_record) + 512); + ok1(tdb->file->allrecord_lock.count == 0 && tdb->file->num_lockrecs == 0); + ok1(free_record_length(tdb, layout->elem[1].base.off) + == 1024 + sizeof(struct tdb_used_record) + 512); + ok1(test == TDB_ERR_NOEXIST); + ok1(tdb_check(tdb, NULL, NULL) == 0); + tdb_close(tdb); + tdb_layout_free(layout); + + /* Coalescing can be done due to three free records, then EOF */ + layout = new_tdb_layout("run-03-coalesce.tdb"); + tdb_layout_add_freetable(layout); + tdb_layout_add_free(layout, 1024, 0); + tdb_layout_add_free(layout, 512, 0); + tdb_layout_add_free(layout, 256, 0); + tdb = tdb_layout_get(layout); + ok1(free_record_length(tdb, layout->elem[1].base.off) == 1024); + ok1(free_record_length(tdb, layout->elem[2].base.off) == 512); + ok1(free_record_length(tdb, layout->elem[3].base.off) == 256); + ok1(tdb_check(tdb, NULL, NULL) == 0); + + /* Figure out which bucket free entry is. */ + b_off = bucket_off(tdb->ftable_off, size_to_bucket(1024)); + /* Lock and coalesce. */ + ok1(tdb_lock_free_bucket(tdb, b_off, TDB_LOCK_WAIT) == 0); + test = layout->elem[2].base.off; + ok1(coalesce(tdb, layout->elem[1].base.off, b_off, 1024, &test) + == 1024 + sizeof(struct tdb_used_record) + 512 + + sizeof(struct tdb_used_record) + 256); + ok1(tdb->file->allrecord_lock.count == 0 + && tdb->file->num_lockrecs == 0); + ok1(free_record_length(tdb, layout->elem[1].base.off) + == 1024 + sizeof(struct tdb_used_record) + 512 + + sizeof(struct tdb_used_record) + 256); + ok1(tdb_check(tdb, NULL, NULL) == 0); + tdb_close(tdb); + tdb_layout_free(layout); + + ok1(tap_log_messages == 0); + return exit_status(); +} diff --git a/lib/tdb2/test/run-04-basichash.c b/lib/tdb2/test/run-04-basichash.c new file mode 100644 index 0000000000..62031bdb40 --- /dev/null +++ b/lib/tdb2/test/run-04-basichash.c @@ -0,0 +1,267 @@ +#include <ccan/tdb2/tdb.c> +#include <ccan/tdb2/open.c> +#include <ccan/tdb2/free.c> +#include <ccan/tdb2/lock.c> +#include <ccan/tdb2/io.c> +#include <ccan/tdb2/hash.c> +#include <ccan/tdb2/transaction.c> +#include <ccan/tdb2/check.c> +#include <ccan/tap/tap.h> +#include "logging.h" + +/* We rig the hash so adjacent-numbered records always clash. */ +static uint64_t clash(const void *key, size_t len, uint64_t seed, void *priv) +{ + return ((uint64_t)*(const unsigned int *)key) + << (64 - TDB_TOPLEVEL_HASH_BITS - 1); +} + +int main(int argc, char *argv[]) +{ + unsigned int i, j; + struct tdb_context *tdb; + unsigned int v; + struct tdb_used_record rec; + struct tdb_data key = { (unsigned char *)&v, sizeof(v) }; + struct tdb_data dbuf = { (unsigned char *)&v, sizeof(v) }; + union tdb_attribute hattr = { .hash = { .base = { TDB_ATTRIBUTE_HASH }, + .fn = clash } }; + int flags[] = { TDB_INTERNAL, TDB_DEFAULT, TDB_NOMMAP, + TDB_INTERNAL|TDB_CONVERT, TDB_CONVERT, + TDB_NOMMAP|TDB_CONVERT, + }; + + hattr.base.next = &tap_log_attr; + + plan_tests(sizeof(flags) / sizeof(flags[0]) + * (91 + (2 * ((1 << TDB_HASH_GROUP_BITS) - 1))) + 1); + for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { + struct hash_info h; + tdb_off_t new_off, off, subhash; + + tdb = tdb_open("run-04-basichash.tdb", flags[i], + O_RDWR|O_CREAT|O_TRUNC, 0600, &hattr); + ok1(tdb); + if (!tdb) + continue; + + v = 0; + /* Should not find it. */ + ok1(find_and_lock(tdb, key, F_WRLCK, &h, &rec, NULL) == 0); + /* Should have created correct hash. */ + ok1(h.h == tdb_hash(tdb, key.dptr, key.dsize)); + /* Should have located space in group 0, bucket 0. */ + ok1(h.group_start == offsetof(struct tdb_header, hashtable)); + ok1(h.home_bucket == 0); + ok1(h.found_bucket == 0); + ok1(h.hash_used == TDB_TOPLEVEL_HASH_BITS); + + /* Should have lock on bucket 0 */ + ok1(h.hlock_start == 0); + ok1(h.hlock_range == + 1ULL << (64-(TDB_TOPLEVEL_HASH_BITS-TDB_HASH_GROUP_BITS))); + ok1((tdb->flags & TDB_NOLOCK) || tdb->file->num_lockrecs == 1); + ok1((tdb->flags & TDB_NOLOCK) + || tdb->file->lockrecs[0].off == TDB_HASH_LOCK_START); + /* FIXME: Check lock length */ + + /* Allocate a new record. */ + new_off = alloc(tdb, key.dsize, dbuf.dsize, h.h, + TDB_USED_MAGIC, false); + ok1(!TDB_OFF_IS_ERR(new_off)); + + /* We should be able to add it now. */ + ok1(add_to_hash(tdb, &h, new_off) == 0); + + /* Make sure we fill it in for later finding. */ + off = new_off + sizeof(struct tdb_used_record); + ok1(!tdb->methods->twrite(tdb, off, key.dptr, key.dsize)); + off += key.dsize; + ok1(!tdb->methods->twrite(tdb, off, dbuf.dptr, dbuf.dsize)); + + /* We should be able to unlock that OK. */ + ok1(tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range, + F_WRLCK) == 0); + + /* Database should be consistent. */ + ok1(tdb_check(tdb, NULL, NULL) == 0); + + /* Now, this should give a successful lookup. */ + ok1(find_and_lock(tdb, key, F_WRLCK, &h, &rec, NULL) + == new_off); + /* Should have created correct hash. */ + ok1(h.h == tdb_hash(tdb, key.dptr, key.dsize)); + /* Should have located space in group 0, bucket 0. */ + ok1(h.group_start == offsetof(struct tdb_header, hashtable)); + ok1(h.home_bucket == 0); + ok1(h.found_bucket == 0); + ok1(h.hash_used == TDB_TOPLEVEL_HASH_BITS); + + /* Should have lock on bucket 0 */ + ok1(h.hlock_start == 0); + ok1(h.hlock_range == + 1ULL << (64-(TDB_TOPLEVEL_HASH_BITS-TDB_HASH_GROUP_BITS))); + ok1((tdb->flags & TDB_NOLOCK) || tdb->file->num_lockrecs == 1); + ok1((tdb->flags & TDB_NOLOCK) + || tdb->file->lockrecs[0].off == TDB_HASH_LOCK_START); + /* FIXME: Check lock length */ + + ok1(tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range, + F_WRLCK) == 0); + + /* Database should be consistent. */ + ok1(tdb_check(tdb, NULL, NULL) == 0); + + /* Test expansion. */ + v = 1; + ok1(find_and_lock(tdb, key, F_WRLCK, &h, &rec, NULL) == 0); + /* Should have created correct hash. */ + ok1(h.h == tdb_hash(tdb, key.dptr, key.dsize)); + /* Should have located space in group 0, bucket 1. */ + ok1(h.group_start == offsetof(struct tdb_header, hashtable)); + ok1(h.home_bucket == 0); + ok1(h.found_bucket == 1); + ok1(h.hash_used == TDB_TOPLEVEL_HASH_BITS); + + /* Should have lock on bucket 0 */ + ok1(h.hlock_start == 0); + ok1(h.hlock_range == + 1ULL << (64-(TDB_TOPLEVEL_HASH_BITS-TDB_HASH_GROUP_BITS))); + ok1((tdb->flags & TDB_NOLOCK) || tdb->file->num_lockrecs == 1); + ok1((tdb->flags & TDB_NOLOCK) + || tdb->file->lockrecs[0].off == TDB_HASH_LOCK_START); + /* FIXME: Check lock length */ + + /* Make it expand 0'th bucket. */ + ok1(expand_group(tdb, &h) == 0); + /* First one should be subhash, next should be empty. */ + ok1(is_subhash(h.group[0])); + subhash = (h.group[0] & TDB_OFF_MASK); + for (j = 1; j < (1 << TDB_HASH_GROUP_BITS); j++) + ok1(h.group[j] == 0); + + ok1(tdb_write_convert(tdb, h.group_start, + h.group, sizeof(h.group)) == 0); + ok1(tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range, + F_WRLCK) == 0); + + /* Should be happy with expansion. */ + ok1(tdb_check(tdb, NULL, NULL) == 0); + + /* Should be able to find it. */ + v = 0; + ok1(find_and_lock(tdb, key, F_WRLCK, &h, &rec, NULL) + == new_off); + /* Should have created correct hash. */ + ok1(h.h == tdb_hash(tdb, key.dptr, key.dsize)); + /* Should have located space in expanded group 0, bucket 0. */ + ok1(h.group_start == subhash + sizeof(struct tdb_used_record)); + ok1(h.home_bucket == 0); + ok1(h.found_bucket == 0); + ok1(h.hash_used == TDB_TOPLEVEL_HASH_BITS + + TDB_SUBLEVEL_HASH_BITS); + + /* Should have lock on bucket 0 */ + ok1(h.hlock_start == 0); + ok1(h.hlock_range == + 1ULL << (64-(TDB_TOPLEVEL_HASH_BITS-TDB_HASH_GROUP_BITS))); + ok1((tdb->flags & TDB_NOLOCK) || tdb->file->num_lockrecs == 1); + ok1((tdb->flags & TDB_NOLOCK) + || tdb->file->lockrecs[0].off == TDB_HASH_LOCK_START); + /* FIXME: Check lock length */ + + /* Simple delete should work. */ + ok1(delete_from_hash(tdb, &h) == 0); + ok1(add_free_record(tdb, new_off, + sizeof(struct tdb_used_record) + + rec_key_length(&rec) + + rec_data_length(&rec) + + rec_extra_padding(&rec), + TDB_LOCK_NOWAIT, false) == 0); + ok1(tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range, + F_WRLCK) == 0); + ok1(tdb_check(tdb, NULL, NULL) == 0); + + /* Test second-level expansion: should expand 0th bucket. */ + v = 0; + ok1(find_and_lock(tdb, key, F_WRLCK, &h, &rec, NULL) == 0); + /* Should have created correct hash. */ + ok1(h.h == tdb_hash(tdb, key.dptr, key.dsize)); + /* Should have located space in group 0, bucket 0. */ + ok1(h.group_start == subhash + sizeof(struct tdb_used_record)); + ok1(h.home_bucket == 0); + ok1(h.found_bucket == 0); + ok1(h.hash_used == TDB_TOPLEVEL_HASH_BITS+TDB_SUBLEVEL_HASH_BITS); + + /* Should have lock on bucket 0 */ + ok1(h.hlock_start == 0); + ok1(h.hlock_range == + 1ULL << (64-(TDB_TOPLEVEL_HASH_BITS-TDB_HASH_GROUP_BITS))); + ok1((tdb->flags & TDB_NOLOCK) || tdb->file->num_lockrecs == 1); + ok1((tdb->flags & TDB_NOLOCK) + || tdb->file->lockrecs[0].off == TDB_HASH_LOCK_START); + /* FIXME: Check lock length */ + + ok1(expand_group(tdb, &h) == 0); + /* First one should be subhash, next should be empty. */ + ok1(is_subhash(h.group[0])); + subhash = (h.group[0] & TDB_OFF_MASK); + for (j = 1; j < (1 << TDB_HASH_GROUP_BITS); j++) + ok1(h.group[j] == 0); + ok1(tdb_write_convert(tdb, h.group_start, + h.group, sizeof(h.group)) == 0); + ok1(tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range, + F_WRLCK) == 0); + + /* Should be happy with expansion. */ + ok1(tdb_check(tdb, NULL, NULL) == 0); + + ok1(find_and_lock(tdb, key, F_WRLCK, &h, &rec, NULL) == 0); + /* Should have created correct hash. */ + ok1(h.h == tdb_hash(tdb, key.dptr, key.dsize)); + /* Should have located space in group 0, bucket 0. */ + ok1(h.group_start == subhash + sizeof(struct tdb_used_record)); + ok1(h.home_bucket == 0); + ok1(h.found_bucket == 0); + ok1(h.hash_used == TDB_TOPLEVEL_HASH_BITS + + TDB_SUBLEVEL_HASH_BITS * 2); + + /* We should be able to add it now. */ + /* Allocate a new record. */ + new_off = alloc(tdb, key.dsize, dbuf.dsize, h.h, + TDB_USED_MAGIC, false); + ok1(!TDB_OFF_IS_ERR(new_off)); + ok1(add_to_hash(tdb, &h, new_off) == 0); + + /* Make sure we fill it in for later finding. */ + off = new_off + sizeof(struct tdb_used_record); + ok1(!tdb->methods->twrite(tdb, off, key.dptr, key.dsize)); + off += key.dsize; + ok1(!tdb->methods->twrite(tdb, off, dbuf.dptr, dbuf.dsize)); + + /* We should be able to unlock that OK. */ + ok1(tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range, + F_WRLCK) == 0); + + /* Database should be consistent. */ + ok1(tdb_check(tdb, NULL, NULL) == 0); + + /* Should be able to find it. */ + v = 0; + ok1(find_and_lock(tdb, key, F_WRLCK, &h, &rec, NULL) + == new_off); + /* Should have created correct hash. */ + ok1(h.h == tdb_hash(tdb, key.dptr, key.dsize)); + /* Should have located space in expanded group 0, bucket 0. */ + ok1(h.group_start == subhash + sizeof(struct tdb_used_record)); + ok1(h.home_bucket == 0); + ok1(h.found_bucket == 0); + ok1(h.hash_used == TDB_TOPLEVEL_HASH_BITS + + TDB_SUBLEVEL_HASH_BITS * 2); + + tdb_close(tdb); + } + + ok1(tap_log_messages == 0); + return exit_status(); +} diff --git a/lib/tdb2/test/run-05-readonly-open.c b/lib/tdb2/test/run-05-readonly-open.c new file mode 100644 index 0000000000..0f1a4343d8 --- /dev/null +++ b/lib/tdb2/test/run-05-readonly-open.c @@ -0,0 +1,88 @@ +#include <ccan/failtest/failtest_override.h> +#include <ccan/tdb2/tdb.c> +#include <ccan/tdb2/open.c> +#include <ccan/tdb2/free.c> +#include <ccan/tdb2/lock.c> +#include <ccan/tdb2/io.c> +#include <ccan/tdb2/hash.c> +#include <ccan/tdb2/transaction.c> +#include <ccan/tdb2/check.c> +#include <ccan/tap/tap.h> +#include <ccan/failtest/failtest.h> +#include "logging.h" +#include "failtest_helper.h" + +static bool failtest_suppress = false; + +/* Don't need to test everything here, just want expand testing. */ +static enum failtest_result +suppress_failure(struct failtest_call *history, unsigned num) +{ + if (failtest_suppress) + return FAIL_DONT_FAIL; + return block_repeat_failures(history, num); +} + +int main(int argc, char *argv[]) +{ + unsigned int i; + struct tdb_context *tdb; + int flags[] = { TDB_DEFAULT, TDB_NOMMAP, + TDB_CONVERT, TDB_NOMMAP|TDB_CONVERT }; + struct tdb_data key = tdb_mkdata("key", 3); + struct tdb_data data = tdb_mkdata("data", 4), d; + union tdb_attribute seed_attr; + unsigned int msgs = 0; + + failtest_init(argc, argv); + failtest_hook = suppress_failure; + failtest_exit_check = exit_check_log; + + seed_attr.base.attr = TDB_ATTRIBUTE_SEED; + seed_attr.base.next = &tap_log_attr; + seed_attr.seed.seed = 0; + + failtest_suppress = true; + plan_tests(sizeof(flags) / sizeof(flags[0]) * 11); + for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { + tdb = tdb_open("run-05-readonly-open.tdb", flags[i], + O_RDWR|O_CREAT|O_TRUNC, 0600, &seed_attr); + ok1(tdb_store(tdb, key, data, TDB_INSERT) == 0); + tdb_close(tdb); + + failtest_suppress = false; + tdb = tdb_open("run-05-readonly-open.tdb", flags[i], + O_RDONLY, 0600, &tap_log_attr); + if (!ok1(tdb)) + break; + ok1(tap_log_messages == msgs); + /* Fetch should succeed, stores should fail. */ + if (!ok1(tdb_fetch(tdb, key, &d) == 0)) + goto fail; + ok1(tdb_deq(d, data)); + free(d.dptr); + if (!ok1(tdb_store(tdb, key, data, TDB_MODIFY) + == TDB_ERR_RDONLY)) + goto fail; + ok1(tap_log_messages == ++msgs); + if (!ok1(tdb_store(tdb, key, data, TDB_INSERT) + == TDB_ERR_RDONLY)) + goto fail; + ok1(tap_log_messages == ++msgs); + failtest_suppress = true; + ok1(tdb_check(tdb, NULL, NULL) == 0); + tdb_close(tdb); + ok1(tap_log_messages == msgs); + /* SIGH: failtest bug, it doesn't save the tdb file because + * we have it read-only. If we go around again, it gets + * changed underneath us and things get screwy. */ + if (failtest_has_failed()) + break; + } + failtest_exit(exit_status()); + +fail: + failtest_suppress = true; + tdb_close(tdb); + failtest_exit(exit_status()); +} diff --git a/lib/tdb2/test/run-10-simple-store.c b/lib/tdb2/test/run-10-simple-store.c new file mode 100644 index 0000000000..35c387a3be --- /dev/null +++ b/lib/tdb2/test/run-10-simple-store.c @@ -0,0 +1,76 @@ +#include <ccan/failtest/failtest_override.h> +#include <ccan/tdb2/tdb.c> +#include <ccan/tdb2/open.c> +#include <ccan/tdb2/free.c> +#include <ccan/tdb2/lock.c> +#include <ccan/tdb2/io.c> +#include <ccan/tdb2/hash.c> +#include <ccan/tdb2/transaction.c> +#include <ccan/tdb2/check.c> +#include <ccan/tap/tap.h> +#include <ccan/failtest/failtest.h> +#include "logging.h" +#include "failtest_helper.h" + +static bool failtest_suppress = false; + +/* Don't need to test everything here, just want expand testing. */ +static enum failtest_result +suppress_failure(struct failtest_call *history, unsigned num) +{ + if (failtest_suppress) + return FAIL_DONT_FAIL; + return block_repeat_failures(history, num); +} + +int main(int argc, char *argv[]) +{ + unsigned int i; + struct tdb_context *tdb; + int flags[] = { TDB_INTERNAL, TDB_DEFAULT, TDB_NOMMAP, + TDB_INTERNAL|TDB_CONVERT, TDB_CONVERT, + TDB_NOMMAP|TDB_CONVERT }; + struct tdb_data key = tdb_mkdata("key", 3); + struct tdb_data data = tdb_mkdata("data", 4); + + failtest_init(argc, argv); + failtest_hook = suppress_failure; + failtest_exit_check = exit_check_log; + + failtest_suppress = true; + plan_tests(sizeof(flags) / sizeof(flags[0]) * 7 + 1); + for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { + tdb = tdb_open("run-10-simple-store.tdb", flags[i], + O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr); + if (!ok1(tdb)) + break; + /* Modify should fail. */ + failtest_suppress = false; + if (!ok1(tdb_store(tdb, key, data, TDB_MODIFY) + == TDB_ERR_NOEXIST)) + goto fail; + failtest_suppress = true; + ok1(tdb_check(tdb, NULL, NULL) == 0); + /* Insert should succeed. */ + failtest_suppress = false; + if (!ok1(tdb_store(tdb, key, data, TDB_INSERT) == 0)) + goto fail; + failtest_suppress = true; + ok1(tdb_check(tdb, NULL, NULL) == 0); + /* Second insert should fail. */ + failtest_suppress = false; + if (!ok1(tdb_store(tdb, key, data, TDB_INSERT) + == TDB_ERR_EXISTS)) + goto fail; + failtest_suppress = true; + ok1(tdb_check(tdb, NULL, NULL) == 0); + tdb_close(tdb); + } + ok1(tap_log_messages == 0); + failtest_exit(exit_status()); + +fail: + failtest_suppress = true; + tdb_close(tdb); + failtest_exit(exit_status()); +} diff --git a/lib/tdb2/test/run-11-simple-fetch.c b/lib/tdb2/test/run-11-simple-fetch.c new file mode 100644 index 0000000000..29b6bf0872 --- /dev/null +++ b/lib/tdb2/test/run-11-simple-fetch.c @@ -0,0 +1,76 @@ +#include <ccan/failtest/failtest_override.h> +#include <ccan/tdb2/tdb.c> +#include <ccan/tdb2/open.c> +#include <ccan/tdb2/free.c> +#include <ccan/tdb2/lock.c> +#include <ccan/tdb2/io.c> +#include <ccan/tdb2/hash.c> +#include <ccan/tdb2/transaction.c> +#include <ccan/tdb2/check.c> +#include <ccan/tap/tap.h> +#include <ccan/failtest/failtest.h> +#include "logging.h" +#include "failtest_helper.h" + +static bool failtest_suppress = false; + +/* Don't need to test everything here, just want fetch testing. */ +static enum failtest_result +suppress_failure(struct failtest_call *history, unsigned num) +{ + if (failtest_suppress) + return FAIL_DONT_FAIL; + return block_repeat_failures(history, num); +} + +int main(int argc, char *argv[]) +{ + unsigned int i; + struct tdb_context *tdb; + int flags[] = { TDB_INTERNAL, TDB_DEFAULT, TDB_NOMMAP, + TDB_INTERNAL|TDB_CONVERT, TDB_CONVERT, + TDB_NOMMAP|TDB_CONVERT }; + struct tdb_data key = tdb_mkdata("key", 3); + struct tdb_data data = tdb_mkdata("data", 4); + + failtest_init(argc, argv); + failtest_hook = suppress_failure; + failtest_exit_check = exit_check_log; + + failtest_suppress = true; + plan_tests(sizeof(flags) / sizeof(flags[0]) * 8 + 1); + for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { + tdb = tdb_open("run-11-simple-fetch.tdb", flags[i], + O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr); + ok1(tdb); + if (tdb) { + struct tdb_data d = { NULL, 0 }; /* Bogus GCC warning */ + + /* fetch should fail. */ + failtest_suppress = false; + if (!ok1(tdb_fetch(tdb, key, &d) == TDB_ERR_NOEXIST)) + goto fail; + failtest_suppress = true; + ok1(tdb_check(tdb, NULL, NULL) == 0); + /* Insert should succeed. */ + ok1(tdb_store(tdb, key, data, TDB_INSERT) == 0); + ok1(tdb_check(tdb, NULL, NULL) == 0); + /* Fetch should now work. */ + failtest_suppress = false; + if (!ok1(tdb_fetch(tdb, key, &d) == TDB_SUCCESS)) + goto fail; + failtest_suppress = true; + ok1(tdb_deq(d, data)); + free(d.dptr); + ok1(tdb_check(tdb, NULL, NULL) == 0); + tdb_close(tdb); + } + } + ok1(tap_log_messages == 0); + return exit_status(); + +fail: + failtest_suppress = true; + tdb_close(tdb); + failtest_exit(exit_status()); +} diff --git a/lib/tdb2/test/run-12-store.c b/lib/tdb2/test/run-12-store.c new file mode 100644 index 0000000000..ba2e4f8971 --- /dev/null +++ b/lib/tdb2/test/run-12-store.c @@ -0,0 +1,58 @@ +#include <ccan/tdb2/tdb.c> +#include <ccan/tdb2/open.c> +#include <ccan/tdb2/free.c> +#include <ccan/tdb2/lock.c> +#include <ccan/tdb2/io.c> +#include <ccan/tdb2/hash.c> +#include <ccan/tdb2/check.c> +#include <ccan/tdb2/transaction.c> +#include <ccan/tap/tap.h> +#include "logging.h" + +/* We use the same seed which we saw a failure on. */ +static uint64_t fixedhash(const void *key, size_t len, uint64_t seed, void *p) +{ + return hash64_stable((const unsigned char *)key, len, + *(uint64_t *)p); +} + +int main(int argc, char *argv[]) +{ + unsigned int i, j; + struct tdb_context *tdb; + uint64_t seed = 16014841315512641303ULL; + union tdb_attribute fixed_hattr + = { .hash = { .base = { TDB_ATTRIBUTE_HASH }, + .fn = fixedhash, + .data = &seed } }; + int flags[] = { TDB_INTERNAL, TDB_DEFAULT, TDB_NOMMAP, + TDB_INTERNAL|TDB_CONVERT, TDB_CONVERT, + TDB_NOMMAP|TDB_CONVERT }; + struct tdb_data key = { (unsigned char *)&j, sizeof(j) }; + struct tdb_data data = { (unsigned char *)&j, sizeof(j) }; + + fixed_hattr.base.next = &tap_log_attr; + + plan_tests(sizeof(flags) / sizeof(flags[0]) * (1 + 500 * 3) + 1); + for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { + tdb = tdb_open("run-12-store.tdb", flags[i], + O_RDWR|O_CREAT|O_TRUNC, 0600, &fixed_hattr); + ok1(tdb); + if (!tdb) + continue; + + /* We seemed to lose some keys. + * Insert and check they're in there! */ + for (j = 0; j < 500; j++) { + struct tdb_data d = { NULL, 0 }; /* Bogus GCC warning */ + ok1(tdb_store(tdb, key, data, TDB_REPLACE) == 0); + ok1(tdb_fetch(tdb, key, &d) == TDB_SUCCESS); + ok1(tdb_deq(d, data)); + free(d.dptr); + } + tdb_close(tdb); + } + + ok1(tap_log_messages == 0); + return exit_status(); +} diff --git a/lib/tdb2/test/run-13-delete.c b/lib/tdb2/test/run-13-delete.c new file mode 100644 index 0000000000..3b464d927e --- /dev/null +++ b/lib/tdb2/test/run-13-delete.c @@ -0,0 +1,207 @@ +#include <ccan/tdb2/tdb.c> +#include <ccan/tdb2/open.c> +#include <ccan/tdb2/free.c> +#include <ccan/tdb2/lock.c> +#include <ccan/tdb2/io.c> +#include <ccan/tdb2/hash.c> +#include <ccan/tdb2/check.c> +#include <ccan/tdb2/transaction.c> +#include <ccan/tap/tap.h> +#include "logging.h" + +/* We rig the hash so adjacent-numbered records always clash. */ +static uint64_t clash(const void *key, size_t len, uint64_t seed, void *priv) +{ + return ((uint64_t)*(const unsigned int *)key) + << (64 - TDB_TOPLEVEL_HASH_BITS - 1); +} + +/* We use the same seed which we saw a failure on. */ +static uint64_t fixedhash(const void *key, size_t len, uint64_t seed, void *p) +{ + return hash64_stable((const unsigned char *)key, len, + *(uint64_t *)p); +} + +static bool store_records(struct tdb_context *tdb) +{ + int i; + struct tdb_data key = { (unsigned char *)&i, sizeof(i) }; + struct tdb_data d, data = { (unsigned char *)&i, sizeof(i) }; + + for (i = 0; i < 1000; i++) { + if (tdb_store(tdb, key, data, TDB_REPLACE) != 0) + return false; + tdb_fetch(tdb, key, &d); + if (!tdb_deq(d, data)) + return false; + free(d.dptr); + } + return true; +} + +static void test_val(struct tdb_context *tdb, uint64_t val) +{ + uint64_t v; + struct tdb_data key = { (unsigned char *)&v, sizeof(v) }; + struct tdb_data d, data = { (unsigned char *)&v, sizeof(v) }; + + /* Insert an entry, then delete it. */ + v = val; + /* Delete should fail. */ + ok1(tdb_delete(tdb, key) == TDB_ERR_NOEXIST); + ok1(tdb_check(tdb, NULL, NULL) == 0); + + /* Insert should succeed. */ + ok1(tdb_store(tdb, key, data, TDB_INSERT) == 0); + ok1(tdb_check(tdb, NULL, NULL) == 0); + + /* Delete should succeed. */ + ok1(tdb_delete(tdb, key) == 0); + ok1(tdb_check(tdb, NULL, NULL) == 0); + + /* Re-add it, then add collision. */ + ok1(tdb_store(tdb, key, data, TDB_INSERT) == 0); + v = val + 1; + ok1(tdb_store(tdb, key, data, TDB_INSERT) == 0); + ok1(tdb_check(tdb, NULL, NULL) == 0); + + /* Can find both? */ + ok1(tdb_fetch(tdb, key, &d) == TDB_SUCCESS); + ok1(d.dsize == data.dsize); + free(d.dptr); + v = val; + ok1(tdb_fetch(tdb, key, &d) == TDB_SUCCESS); + ok1(d.dsize == data.dsize); + free(d.dptr); + + /* Delete second one. */ + v = val + 1; + ok1(tdb_delete(tdb, key) == 0); + ok1(tdb_check(tdb, NULL, NULL) == 0); + + /* Re-add */ + ok1(tdb_store(tdb, key, data, TDB_INSERT) == 0); + ok1(tdb_check(tdb, NULL, NULL) == 0); + + /* Now, try deleting first one. */ + v = val; + ok1(tdb_delete(tdb, key) == 0); + ok1(tdb_check(tdb, NULL, NULL) == 0); + + /* Can still find second? */ + v = val + 1; + ok1(tdb_fetch(tdb, key, &d) == TDB_SUCCESS); + ok1(d.dsize == data.dsize); + free(d.dptr); + + /* Now, this will be ideally placed. */ + v = val + 2; + ok1(tdb_store(tdb, key, data, TDB_INSERT) == 0); + ok1(tdb_check(tdb, NULL, NULL) == 0); + + /* This will collide with both. */ + v = val; + ok1(tdb_store(tdb, key, data, TDB_INSERT) == 0); + + /* We can still find them all, right? */ + ok1(tdb_fetch(tdb, key, &d) == TDB_SUCCESS); + ok1(d.dsize == data.dsize); + free(d.dptr); + v = val + 1; + ok1(tdb_fetch(tdb, key, &d) == TDB_SUCCESS); + ok1(d.dsize == data.dsize); + free(d.dptr); + v = val + 2; + ok1(tdb_fetch(tdb, key, &d) == TDB_SUCCESS); + ok1(d.dsize == data.dsize); + free(d.dptr); + + /* And if we delete val + 1, that val + 2 should not move! */ + v = val + 1; + ok1(tdb_delete(tdb, key) == 0); + ok1(tdb_check(tdb, NULL, NULL) == 0); + + v = val; + ok1(tdb_fetch(tdb, key, &d) == TDB_SUCCESS); + ok1(d.dsize == data.dsize); + free(d.dptr); + v = val + 2; + ok1(tdb_fetch(tdb, key, &d) == TDB_SUCCESS); + ok1(d.dsize == data.dsize); + free(d.dptr); + + /* Delete those two, so we are empty. */ + ok1(tdb_delete(tdb, key) == 0); + v = val; + ok1(tdb_delete(tdb, key) == 0); + + ok1(tdb_check(tdb, NULL, NULL) == 0); +} + +int main(int argc, char *argv[]) +{ + unsigned int i, j; + struct tdb_context *tdb; + uint64_t seed = 16014841315512641303ULL; + union tdb_attribute clash_hattr + = { .hash = { .base = { TDB_ATTRIBUTE_HASH }, + .fn = clash } }; + union tdb_attribute fixed_hattr + = { .hash = { .base = { TDB_ATTRIBUTE_HASH }, + .fn = fixedhash, + .data = &seed } }; + int flags[] = { TDB_INTERNAL, TDB_DEFAULT, TDB_NOMMAP, + TDB_INTERNAL|TDB_CONVERT, TDB_CONVERT, + TDB_NOMMAP|TDB_CONVERT }; + /* These two values gave trouble before. */ + int vals[] = { 755, 837 }; + + clash_hattr.base.next = &tap_log_attr; + fixed_hattr.base.next = &tap_log_attr; + + plan_tests(sizeof(flags) / sizeof(flags[0]) + * (39 * 3 + 5 + sizeof(vals)/sizeof(vals[0])*2) + 1); + for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { + tdb = tdb_open("run-13-delete.tdb", flags[i], + O_RDWR|O_CREAT|O_TRUNC, 0600, &clash_hattr); + ok1(tdb); + if (!tdb) + continue; + + /* Check start of hash table. */ + test_val(tdb, 0); + + /* Check end of hash table. */ + test_val(tdb, -1ULL); + + /* Check mixed bitpattern. */ + test_val(tdb, 0x123456789ABCDEF0ULL); + + ok1(!tdb->file || (tdb->file->allrecord_lock.count == 0 + && tdb->file->num_lockrecs == 0)); + tdb_close(tdb); + + /* Deleting these entries in the db gave problems. */ + tdb = tdb_open("run-13-delete.tdb", flags[i], + O_RDWR|O_CREAT|O_TRUNC, 0600, &fixed_hattr); + ok1(tdb); + if (!tdb) + continue; + + ok1(store_records(tdb)); + ok1(tdb_check(tdb, NULL, NULL) == 0); + for (j = 0; j < sizeof(vals)/sizeof(vals[0]); j++) { + struct tdb_data key; + + key.dptr = (unsigned char *)&vals[j]; + key.dsize = sizeof(vals[j]); + ok1(tdb_delete(tdb, key) == 0); + ok1(tdb_check(tdb, NULL, NULL) == 0); + } + tdb_close(tdb); + } + + ok1(tap_log_messages == 0); + return exit_status(); +} diff --git a/lib/tdb2/test/run-14-exists.c b/lib/tdb2/test/run-14-exists.c new file mode 100644 index 0000000000..f264a6f2c9 --- /dev/null +++ b/lib/tdb2/test/run-14-exists.c @@ -0,0 +1,57 @@ +#include <ccan/tdb2/tdb.c> +#include <ccan/tdb2/open.c> +#include <ccan/tdb2/free.c> +#include <ccan/tdb2/lock.c> +#include <ccan/tdb2/io.c> +#include <ccan/tdb2/hash.c> +#include <ccan/tdb2/check.c> +#include <ccan/tdb2/transaction.c> +#include <ccan/tap/tap.h> +#include "logging.h" + +static bool test_records(struct tdb_context *tdb) +{ + int i; + struct tdb_data key = { (unsigned char *)&i, sizeof(i) }; + struct tdb_data data = { (unsigned char *)&i, sizeof(i) }; + + for (i = 0; i < 1000; i++) { + if (tdb_exists(tdb, key)) + return false; + if (tdb_store(tdb, key, data, TDB_REPLACE) != 0) + return false; + if (!tdb_exists(tdb, key)) + return false; + } + + for (i = 0; i < 1000; i++) { + if (!tdb_exists(tdb, key)) + return false; + if (tdb_delete(tdb, key) != 0) + return false; + if (tdb_exists(tdb, key)) + return false; + } + return true; +} + +int main(int argc, char *argv[]) +{ + unsigned int i; + struct tdb_context *tdb; + int flags[] = { TDB_INTERNAL, TDB_DEFAULT, TDB_NOMMAP, + TDB_INTERNAL|TDB_CONVERT, TDB_CONVERT, + TDB_NOMMAP|TDB_CONVERT }; + + plan_tests(sizeof(flags) / sizeof(flags[0]) * 2 + 1); + for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { + tdb = tdb_open("run-14-exists.tdb", flags[i], + O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr); + if (ok1(tdb)) + ok1(test_records(tdb)); + tdb_close(tdb); + } + + ok1(tap_log_messages == 0); + return exit_status(); +} diff --git a/lib/tdb2/test/run-15-append.c b/lib/tdb2/test/run-15-append.c new file mode 100644 index 0000000000..d2f9ec6598 --- /dev/null +++ b/lib/tdb2/test/run-15-append.c @@ -0,0 +1,135 @@ +#include <ccan/tdb2/tdb.c> +#include <ccan/tdb2/open.c> +#include <ccan/tdb2/free.c> +#include <ccan/tdb2/lock.c> +#include <ccan/tdb2/io.c> +#include <ccan/tdb2/hash.c> +#include <ccan/tdb2/check.c> +#include <ccan/tdb2/transaction.c> +#include <ccan/tap/tap.h> +#include <ccan/ilog/ilog.h> +#include "logging.h" + +#define MAX_SIZE 13100 +#define SIZE_STEP 131 + +static tdb_off_t tdb_offset(struct tdb_context *tdb, struct tdb_data key) +{ + tdb_off_t off; + struct tdb_used_record rec; + struct hash_info h; + + off = find_and_lock(tdb, key, F_RDLCK, &h, &rec, NULL); + if (TDB_OFF_IS_ERR(off)) + return 0; + tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range, F_RDLCK); + return off; +} + +int main(int argc, char *argv[]) +{ + unsigned int i, j, moves; + struct tdb_context *tdb; + unsigned char *buffer; + tdb_off_t oldoff = 0, newoff; + int flags[] = { TDB_INTERNAL, TDB_DEFAULT, TDB_NOMMAP, + TDB_INTERNAL|TDB_CONVERT, TDB_CONVERT, + TDB_NOMMAP|TDB_CONVERT }; + struct tdb_data key = tdb_mkdata("key", 3); + struct tdb_data data; + + buffer = malloc(MAX_SIZE); + for (i = 0; i < MAX_SIZE; i++) + buffer[i] = i; + + plan_tests(sizeof(flags) / sizeof(flags[0]) + * ((3 + MAX_SIZE/SIZE_STEP * 5) * 2 + 7) + + 1); + + /* Using tdb_store. */ + for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { + tdb = tdb_open("run-append.tdb", flags[i], + O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr); + ok1(tdb); + if (!tdb) + continue; + + moves = 0; + for (j = 0; j < MAX_SIZE; j += SIZE_STEP) { + data.dptr = buffer; + data.dsize = j; + ok1(tdb_store(tdb, key, data, TDB_REPLACE) == 0); + ok1(tdb_check(tdb, NULL, NULL) == 0); + ok1(tdb_fetch(tdb, key, &data) == TDB_SUCCESS); + ok1(data.dsize == j); + ok1(memcmp(data.dptr, buffer, data.dsize) == 0); + free(data.dptr); + newoff = tdb_offset(tdb, key); + if (newoff != oldoff) + moves++; + oldoff = newoff; + } + ok1(!tdb->file || (tdb->file->allrecord_lock.count == 0 + && tdb->file->num_lockrecs == 0)); + /* We should increase by 50% each time... */ + ok(moves <= ilog64(j / SIZE_STEP)*2, "Moved %u times", moves); + tdb_close(tdb); + } + + /* Using tdb_append. */ + for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { + size_t prev_len = 0; + tdb = tdb_open("run-append.tdb", flags[i], + O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr); + ok1(tdb); + if (!tdb) + continue; + + moves = 0; + for (j = 0; j < MAX_SIZE; j += SIZE_STEP) { + data.dptr = buffer + prev_len; + data.dsize = j - prev_len; + ok1(tdb_append(tdb, key, data) == 0); + ok1(tdb_check(tdb, NULL, NULL) == 0); + ok1(tdb_fetch(tdb, key, &data) == TDB_SUCCESS); + ok1(data.dsize == j); + ok1(memcmp(data.dptr, buffer, data.dsize) == 0); + free(data.dptr); + prev_len = data.dsize; + newoff = tdb_offset(tdb, key); + if (newoff != oldoff) + moves++; + oldoff = newoff; + } + ok1(!tdb->file || (tdb->file->allrecord_lock.count == 0 + && tdb->file->num_lockrecs == 0)); + /* We should increase by 50% each time... */ + ok(moves <= ilog64(j / SIZE_STEP)*2, "Moved %u times", moves); + tdb_close(tdb); + } + + for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { + tdb = tdb_open("run-append.tdb", flags[i], + O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr); + ok1(tdb); + if (!tdb) + continue; + + /* Huge initial store. */ + data.dptr = buffer; + data.dsize = MAX_SIZE; + ok1(tdb_append(tdb, key, data) == 0); + ok1(tdb_check(tdb, NULL, NULL) == 0); + ok1(tdb_fetch(tdb, key, &data) == TDB_SUCCESS); + ok1(data.dsize == MAX_SIZE); + ok1(memcmp(data.dptr, buffer, data.dsize) == 0); + free(data.dptr); + ok1(!tdb->file || (tdb->file->allrecord_lock.count == 0 + && tdb->file->num_lockrecs == 0)); + tdb_close(tdb); + } + + ok1(tap_log_messages == 0); + free(buffer); + return exit_status(); +} diff --git a/lib/tdb2/test/run-16-wipe_all.c b/lib/tdb2/test/run-16-wipe_all.c new file mode 100644 index 0000000000..d9c5128e0b --- /dev/null +++ b/lib/tdb2/test/run-16-wipe_all.c @@ -0,0 +1,50 @@ +#include <ccan/tdb2/tdb.c> +#include <ccan/tdb2/open.c> +#include <ccan/tdb2/free.c> +#include <ccan/tdb2/lock.c> +#include <ccan/tdb2/io.c> +#include <ccan/tdb2/hash.c> +#include <ccan/tdb2/check.c> +#include <ccan/tdb2/transaction.c> +#include <ccan/tdb2/traverse.c> +#include <ccan/tap/tap.h> +#include "logging.h" + +static bool add_records(struct tdb_context *tdb) +{ + int i; + struct tdb_data key = { (unsigned char *)&i, sizeof(i) }; + struct tdb_data data = { (unsigned char *)&i, sizeof(i) }; + + for (i = 0; i < 1000; i++) { + if (tdb_store(tdb, key, data, TDB_REPLACE) != 0) + return false; + } + return true; +} + + +int main(int argc, char *argv[]) +{ + unsigned int i; + struct tdb_context *tdb; + int flags[] = { TDB_INTERNAL, TDB_DEFAULT, TDB_NOMMAP, + TDB_INTERNAL|TDB_CONVERT, TDB_CONVERT, + TDB_NOMMAP|TDB_CONVERT }; + + plan_tests(sizeof(flags) / sizeof(flags[0]) * 4 + 1); + for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { + tdb = tdb_open("run-16-wipe_all.tdb", flags[i], + O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr); + if (ok1(tdb)) { + struct tdb_data key; + ok1(add_records(tdb)); + ok1(tdb_wipe_all(tdb) == TDB_SUCCESS); + ok1(tdb_firstkey(tdb, &key) == TDB_ERR_NOEXIST); + tdb_close(tdb); + } + } + + ok1(tap_log_messages == 0); + return exit_status(); +} diff --git a/lib/tdb2/test/run-20-growhash.c b/lib/tdb2/test/run-20-growhash.c new file mode 100644 index 0000000000..22a88c4504 --- /dev/null +++ b/lib/tdb2/test/run-20-growhash.c @@ -0,0 +1,144 @@ +#include <ccan/tdb2/tdb.c> +#include <ccan/tdb2/open.c> +#include <ccan/tdb2/free.c> +#include <ccan/tdb2/lock.c> +#include <ccan/tdb2/io.c> +#include <ccan/tdb2/hash.c> +#include <ccan/tdb2/transaction.c> +#include <ccan/tdb2/check.c> +#include <ccan/tap/tap.h> +#include "logging.h" + +static uint64_t myhash(const void *key, size_t len, uint64_t seed, void *priv) +{ + return *(const uint64_t *)key; +} + +static void add_bits(uint64_t *val, unsigned new, unsigned new_bits, + unsigned *done) +{ + *done += new_bits; + *val |= ((uint64_t)new << (64 - *done)); +} + +static uint64_t make_key(unsigned topgroup, unsigned topbucket, + unsigned subgroup1, unsigned subbucket1, + unsigned subgroup2, unsigned subbucket2) +{ + uint64_t key = 0; + unsigned done = 0; + + add_bits(&key, topgroup, TDB_TOPLEVEL_HASH_BITS - TDB_HASH_GROUP_BITS, + &done); + add_bits(&key, topbucket, TDB_HASH_GROUP_BITS, &done); + add_bits(&key, subgroup1, TDB_SUBLEVEL_HASH_BITS - TDB_HASH_GROUP_BITS, + &done); + add_bits(&key, subbucket1, TDB_HASH_GROUP_BITS, &done); + add_bits(&key, subgroup2, TDB_SUBLEVEL_HASH_BITS - TDB_HASH_GROUP_BITS, + &done); + add_bits(&key, subbucket2, TDB_HASH_GROUP_BITS, &done); + return key; +} + +int main(int argc, char *argv[]) +{ + unsigned int i, j; + struct tdb_context *tdb; + uint64_t kdata; + struct tdb_used_record rec; + struct tdb_data key = { (unsigned char *)&kdata, sizeof(kdata) }; + struct tdb_data dbuf = { (unsigned char *)&kdata, sizeof(kdata) }; + union tdb_attribute hattr = { .hash = { .base = { TDB_ATTRIBUTE_HASH }, + .fn = myhash } }; + int flags[] = { TDB_INTERNAL, TDB_DEFAULT, TDB_NOMMAP, + TDB_INTERNAL|TDB_CONVERT, TDB_CONVERT, + TDB_NOMMAP|TDB_CONVERT, + }; + + hattr.base.next = &tap_log_attr; + + plan_tests(sizeof(flags) / sizeof(flags[0]) + * (9 + (20 + 2 * ((1 << TDB_HASH_GROUP_BITS) - 2)) + * (1 << TDB_HASH_GROUP_BITS)) + 1); + for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { + struct hash_info h; + + tdb = tdb_open("run-04-basichash.tdb", flags[i], + O_RDWR|O_CREAT|O_TRUNC, 0600, &hattr); + ok1(tdb); + if (!tdb) + continue; + + /* Fill a group. */ + for (j = 0; j < (1 << TDB_HASH_GROUP_BITS); j++) { + kdata = make_key(0, j, 0, 0, 0, 0); + ok1(tdb_store(tdb, key, dbuf, TDB_INSERT) == 0); + } + ok1(tdb_check(tdb, NULL, NULL) == 0); + + /* Check first still exists. */ + kdata = make_key(0, 0, 0, 0, 0, 0); + ok1(find_and_lock(tdb, key, F_RDLCK, &h, &rec, NULL) != 0); + /* Should have created correct hash. */ + ok1(h.h == tdb_hash(tdb, key.dptr, key.dsize)); + /* Should have located space in group 0, bucket 0. */ + ok1(h.group_start == offsetof(struct tdb_header, hashtable)); + ok1(h.home_bucket == 0); + ok1(h.found_bucket == 0); + ok1(h.hash_used == TDB_TOPLEVEL_HASH_BITS); + /* Entire group should be full! */ + for (j = 0; j < (1 << TDB_HASH_GROUP_BITS); j++) + ok1(h.group[j] != 0); + + ok1(tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range, + F_RDLCK) == 0); + + /* Now, add one more to each should expand (that) bucket. */ + for (j = 0; j < (1 << TDB_HASH_GROUP_BITS); j++) { + unsigned int k; + kdata = make_key(0, j, 0, 1, 0, 0); + ok1(tdb_store(tdb, key, dbuf, TDB_INSERT) == 0); + ok1(tdb_check(tdb, NULL, NULL) == 0); + + ok1(find_and_lock(tdb, key, F_RDLCK, &h, &rec, NULL)); + /* Should have created correct hash. */ + ok1(h.h == tdb_hash(tdb, key.dptr, key.dsize)); + /* Should have moved to subhash */ + ok1(h.group_start >= sizeof(struct tdb_header)); + ok1(h.home_bucket == 1); + ok1(h.found_bucket == 1); + ok1(h.hash_used == TDB_TOPLEVEL_HASH_BITS + + TDB_SUBLEVEL_HASH_BITS); + ok1(tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range, + F_RDLCK) == 0); + + /* Keep adding, make it expand again. */ + for (k = 2; k < (1 << TDB_HASH_GROUP_BITS); k++) { + kdata = make_key(0, j, 0, k, 0, 0); + ok1(tdb_store(tdb, key, dbuf, TDB_INSERT) == 0); + ok1(tdb_check(tdb, NULL, NULL) == 0); + } + + /* This should tip it over to sub-sub-hash. */ + kdata = make_key(0, j, 0, 0, 0, 1); + ok1(tdb_store(tdb, key, dbuf, TDB_INSERT) == 0); + ok1(tdb_check(tdb, NULL, NULL) == 0); + + ok1(find_and_lock(tdb, key, F_RDLCK, &h, &rec, NULL)); + /* Should have created correct hash. */ + ok1(h.h == tdb_hash(tdb, key.dptr, key.dsize)); + /* Should have moved to subhash */ + ok1(h.group_start >= sizeof(struct tdb_header)); + ok1(h.home_bucket == 1); + ok1(h.found_bucket == 1); + ok1(h.hash_used == TDB_TOPLEVEL_HASH_BITS + + TDB_SUBLEVEL_HASH_BITS + TDB_SUBLEVEL_HASH_BITS); + ok1(tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range, + F_RDLCK) == 0); + } + tdb_close(tdb); + } + + ok1(tap_log_messages == 0); + return exit_status(); +} diff --git a/lib/tdb2/test/run-21-parse_record.c b/lib/tdb2/test/run-21-parse_record.c new file mode 100644 index 0000000000..773cdff4e0 --- /dev/null +++ b/lib/tdb2/test/run-21-parse_record.c @@ -0,0 +1,70 @@ +#include <ccan/tdb2/tdb.c> +#include <ccan/tdb2/open.c> +#include <ccan/tdb2/free.c> +#include <ccan/tdb2/lock.c> +#include <ccan/tdb2/io.c> +#include <ccan/tdb2/hash.c> +#include <ccan/tdb2/check.c> +#include <ccan/tdb2/transaction.c> +#include <ccan/tap/tap.h> +#include "logging.h" + +static enum TDB_ERROR parse(TDB_DATA key, TDB_DATA data, TDB_DATA *expected) +{ + if (!tdb_deq(data, *expected)) + return TDB_ERR_EINVAL; + return TDB_SUCCESS; +} + +static enum TDB_ERROR parse_err(TDB_DATA key, TDB_DATA data, void *unused) +{ + return 100; +} + +static bool test_records(struct tdb_context *tdb) +{ + int i; + struct tdb_data key = { (unsigned char *)&i, sizeof(i) }; + struct tdb_data data = { (unsigned char *)&i, sizeof(i) }; + + for (i = 0; i < 1000; i++) { + if (tdb_store(tdb, key, data, TDB_REPLACE) != 0) + return false; + } + + for (i = 0; i < 1000; i++) { + if (tdb_parse_record(tdb, key, parse, &data) != TDB_SUCCESS) + return false; + } + + if (tdb_parse_record(tdb, key, parse, &data) != TDB_ERR_NOEXIST) + return false; + + /* Test error return from parse function. */ + i = 0; + if (tdb_parse_record(tdb, key, parse_err, NULL) != 100) + return false; + + return true; +} + +int main(int argc, char *argv[]) +{ + unsigned int i; + struct tdb_context *tdb; + int flags[] = { TDB_INTERNAL, TDB_DEFAULT, TDB_NOMMAP, + TDB_INTERNAL|TDB_CONVERT, TDB_CONVERT, + TDB_NOMMAP|TDB_CONVERT }; + + plan_tests(sizeof(flags) / sizeof(flags[0]) * 2 + 1); + for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { + tdb = tdb_open("run-14-exists.tdb", flags[i], + O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr); + if (ok1(tdb)) + ok1(test_records(tdb)); + tdb_close(tdb); + } + + ok1(tap_log_messages == 0); + return exit_status(); +} diff --git a/lib/tdb2/test/run-25-hashoverload.c b/lib/tdb2/test/run-25-hashoverload.c new file mode 100644 index 0000000000..83f549d6b2 --- /dev/null +++ b/lib/tdb2/test/run-25-hashoverload.c @@ -0,0 +1,121 @@ +#include <ccan/tdb2/tdb.c> +#include <ccan/tdb2/open.c> +#include <ccan/tdb2/free.c> +#include <ccan/tdb2/lock.c> +#include <ccan/tdb2/io.c> +#include <ccan/tdb2/hash.c> +#include <ccan/tdb2/transaction.c> +#include <ccan/tdb2/traverse.c> +#include <ccan/tdb2/check.c> +#include <ccan/tap/tap.h> +#include "logging.h" + +static uint64_t badhash(const void *key, size_t len, uint64_t seed, void *priv) +{ + return 0; +} + +static int trav(struct tdb_context *tdb, TDB_DATA key, TDB_DATA dbuf, void *p) +{ + if (p) + return tdb_delete(tdb, key); + return 0; +} + +int main(int argc, char *argv[]) +{ + unsigned int i, j; + struct tdb_context *tdb; + struct tdb_data key = { (unsigned char *)&j, sizeof(j) }; + struct tdb_data dbuf = { (unsigned char *)&j, sizeof(j) }; + union tdb_attribute hattr = { .hash = { .base = { TDB_ATTRIBUTE_HASH }, + .fn = badhash } }; + int flags[] = { TDB_INTERNAL, TDB_DEFAULT, TDB_NOMMAP, + TDB_INTERNAL|TDB_CONVERT, TDB_CONVERT, + TDB_NOMMAP|TDB_CONVERT, + }; + + hattr.base.next = &tap_log_attr; + + plan_tests(6883); + for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { + struct tdb_data d = { NULL, 0 }; /* Bogus GCC warning */ + + tdb = tdb_open("run-25-hashoverload.tdb", flags[i], + O_RDWR|O_CREAT|O_TRUNC, 0600, &hattr); + ok1(tdb); + if (!tdb) + continue; + + /* Fill a group. */ + for (j = 0; j < (1 << TDB_HASH_GROUP_BITS); j++) { + ok1(tdb_store(tdb, key, dbuf, TDB_INSERT) == 0); + } + ok1(tdb_check(tdb, NULL, NULL) == 0); + + /* Now store one last value: should form chain. */ + ok1(tdb_store(tdb, key, dbuf, TDB_INSERT) == 0); + ok1(tdb_check(tdb, NULL, NULL) == 0); + + /* Check we can find them all. */ + for (j = 0; j < (1 << TDB_HASH_GROUP_BITS) + 1; j++) { + ok1(tdb_fetch(tdb, key, &d) == TDB_SUCCESS); + ok1(d.dsize == sizeof(j)); + ok1(d.dptr != NULL); + ok1(d.dptr && memcmp(d.dptr, &j, d.dsize) == 0); + free(d.dptr); + } + + /* Now add a *lot* more. */ + for (j = (1 << TDB_HASH_GROUP_BITS) + 1; + j < (16 << TDB_HASH_GROUP_BITS); + j++) { + ok1(tdb_store(tdb, key, dbuf, TDB_INSERT) == 0); + ok1(tdb_fetch(tdb, key, &d) == TDB_SUCCESS); + ok1(d.dsize == sizeof(j)); + ok1(d.dptr != NULL); + ok1(d.dptr && memcmp(d.dptr, &j, d.dsize) == 0); + free(d.dptr); + } + ok1(tdb_check(tdb, NULL, NULL) == 0); + + /* Traverse through them. */ + ok1(tdb_traverse(tdb, trav, NULL) == j); + + /* Empty the first chain-worth. */ + for (j = 0; j < (1 << TDB_HASH_GROUP_BITS); j++) + ok1(tdb_delete(tdb, key) == 0); + + ok1(tdb_check(tdb, NULL, NULL) == 0); + + for (j = (1 << TDB_HASH_GROUP_BITS); + j < (16 << TDB_HASH_GROUP_BITS); + j++) { + ok1(tdb_fetch(tdb, key, &d) == TDB_SUCCESS); + ok1(d.dsize == sizeof(j)); + ok1(d.dptr != NULL); + ok1(d.dptr && memcmp(d.dptr, &j, d.dsize) == 0); + free(d.dptr); + } + + /* Traverse through them. */ + ok1(tdb_traverse(tdb, trav, NULL) + == (15 << TDB_HASH_GROUP_BITS)); + + /* Re-add */ + for (j = 0; j < (1 << TDB_HASH_GROUP_BITS); j++) { + ok1(tdb_store(tdb, key, dbuf, TDB_INSERT) == 0); + } + ok1(tdb_check(tdb, NULL, NULL) == 0); + + /* Now try deleting as we go. */ + ok1(tdb_traverse(tdb, trav, trav) + == (16 << TDB_HASH_GROUP_BITS)); + ok1(tdb_check(tdb, NULL, NULL) == 0); + ok1(tdb_traverse(tdb, trav, NULL) == 0); + tdb_close(tdb); + } + + ok1(tap_log_messages == 0); + return exit_status(); +} diff --git a/lib/tdb2/test/run-30-exhaust-before-expand.c b/lib/tdb2/test/run-30-exhaust-before-expand.c new file mode 100644 index 0000000000..2386f85f26 --- /dev/null +++ b/lib/tdb2/test/run-30-exhaust-before-expand.c @@ -0,0 +1,79 @@ +#include <ccan/tdb2/tdb.c> +#include <ccan/tdb2/open.c> +#include <ccan/tdb2/free.c> +#include <ccan/tdb2/lock.c> +#include <ccan/tdb2/io.c> +#include <ccan/tdb2/check.c> +#include <ccan/tdb2/hash.c> +#include <ccan/tdb2/transaction.c> +#include <ccan/tap/tap.h> +#include <err.h> +#include "logging.h" + +static bool empty_freetable(struct tdb_context *tdb) +{ + struct tdb_freetable ftab; + unsigned int i; + + /* Now, free table should be completely exhausted in zone 0 */ + if (tdb_read_convert(tdb, tdb->ftable_off, &ftab, sizeof(ftab)) != 0) + abort(); + + for (i = 0; i < sizeof(ftab.buckets)/sizeof(ftab.buckets[0]); i++) { + if (ftab.buckets[i]) + return false; + } + return true; +} + + +int main(int argc, char *argv[]) +{ + unsigned int i, j; + struct tdb_context *tdb; + int flags[] = { TDB_INTERNAL, TDB_DEFAULT, TDB_NOMMAP, + TDB_INTERNAL|TDB_CONVERT, TDB_CONVERT, + TDB_NOMMAP|TDB_CONVERT }; + + plan_tests(sizeof(flags) / sizeof(flags[0]) * 9 + 1); + + for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { + TDB_DATA k; + uint64_t size; + bool was_empty = false; + + k.dptr = (void *)&j; + k.dsize = sizeof(j); + + tdb = tdb_open("run-30-exhaust-before-expand.tdb", flags[i], + O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr); + ok1(tdb); + if (!tdb) + continue; + + ok1(empty_freetable(tdb)); + /* Need some hash lock for expand. */ + ok1(tdb_lock_hashes(tdb, 0, 1, F_WRLCK, TDB_LOCK_WAIT) == 0); + /* Create some free space. */ + ok1(tdb_expand(tdb, 1) == 0); + ok1(tdb_unlock_hashes(tdb, 0, 1, F_WRLCK) == 0); + ok1(tdb_check(tdb, NULL, NULL) == 0); + ok1(!empty_freetable(tdb)); + + size = tdb->file->map_size; + /* Insert minimal-length records until we expand. */ + for (j = 0; tdb->file->map_size == size; j++) { + was_empty = empty_freetable(tdb); + if (tdb_store(tdb, k, k, TDB_INSERT) != 0) + err(1, "Failed to store record %i", j); + } + + /* Would have been empty before expansion, but no longer. */ + ok1(was_empty); + ok1(!empty_freetable(tdb)); + tdb_close(tdb); + } + + ok1(tap_log_messages == 0); + return exit_status(); +} diff --git a/lib/tdb2/test/run-50-multiple-freelists.c b/lib/tdb2/test/run-50-multiple-freelists.c new file mode 100644 index 0000000000..7a48c3e0ee --- /dev/null +++ b/lib/tdb2/test/run-50-multiple-freelists.c @@ -0,0 +1,71 @@ +#include <ccan/tdb2/tdb.c> +#include <ccan/tdb2/open.c> +#include <ccan/tdb2/free.c> +#include <ccan/tdb2/lock.c> +#include <ccan/tdb2/io.c> +#include <ccan/tdb2/hash.c> +#include <ccan/tdb2/check.c> +#include <ccan/tap/tap.h> +#include <ccan/tdb2/transaction.c> +#include "logging.h" +#include "layout.h" + +int main(int argc, char *argv[]) +{ + tdb_off_t off; + struct tdb_context *tdb; + struct tdb_layout *layout; + TDB_DATA key, data; + + plan_tests(11); + key = tdb_mkdata("Hello", 5); + data = tdb_mkdata("world", 5); + + /* Create a TDB with three free tables. */ + layout = new_tdb_layout(NULL); + tdb_layout_add_freetable(layout); + tdb_layout_add_freetable(layout); + tdb_layout_add_freetable(layout); + tdb_layout_add_free(layout, 80, 0); + /* Used record prevent coalescing. */ + tdb_layout_add_used(layout, key, data, 6); + tdb_layout_add_free(layout, 160, 1); + key.dsize--; + tdb_layout_add_used(layout, key, data, 7); + tdb_layout_add_free(layout, 320, 2); + key.dsize--; + tdb_layout_add_used(layout, key, data, 8); + tdb_layout_add_free(layout, 40, 0); + tdb = tdb_layout_get(layout); + ok1(tdb_check(tdb, NULL, NULL) == 0); + + off = get_free(tdb, 0, 80 - sizeof(struct tdb_used_record), 0, + TDB_USED_MAGIC, 0); + ok1(off == layout->elem[3].base.off); + ok1(tdb->ftable_off == layout->elem[0].base.off); + + off = get_free(tdb, 0, 160 - sizeof(struct tdb_used_record), 0, + TDB_USED_MAGIC, 0); + ok1(off == layout->elem[5].base.off); + ok1(tdb->ftable_off == layout->elem[1].base.off); + + off = get_free(tdb, 0, 320 - sizeof(struct tdb_used_record), 0, + TDB_USED_MAGIC, 0); + ok1(off == layout->elem[7].base.off); + ok1(tdb->ftable_off == layout->elem[2].base.off); + + off = get_free(tdb, 0, 40 - sizeof(struct tdb_used_record), 0, + TDB_USED_MAGIC, 0); + ok1(off == layout->elem[9].base.off); + ok1(tdb->ftable_off == layout->elem[0].base.off); + + /* Now we fail. */ + off = get_free(tdb, 0, 0, 1, TDB_USED_MAGIC, 0); + ok1(off == 0); + + tdb_close(tdb); + tdb_layout_free(layout); + + ok1(tap_log_messages == 0); + return exit_status(); +} diff --git a/lib/tdb2/test/run-55-transaction.c b/lib/tdb2/test/run-55-transaction.c new file mode 100644 index 0000000000..1650e40e1f --- /dev/null +++ b/lib/tdb2/test/run-55-transaction.c @@ -0,0 +1,75 @@ +#include <ccan/tdb2/tdb.c> +#include <ccan/tdb2/open.c> +#include <ccan/tdb2/free.c> +#include <ccan/tdb2/lock.c> +#include <ccan/tdb2/io.c> +#include <ccan/tdb2/hash.c> +#include <ccan/tdb2/check.c> +#include <ccan/tdb2/transaction.c> +#include <ccan/tap/tap.h> +#include "logging.h" + +int main(int argc, char *argv[]) +{ + unsigned int i; + struct tdb_context *tdb; + unsigned char *buffer; + int flags[] = { TDB_DEFAULT, TDB_NOMMAP, + TDB_CONVERT, TDB_NOMMAP|TDB_CONVERT }; + struct tdb_data key = tdb_mkdata("key", 3); + struct tdb_data data; + + buffer = malloc(1000); + for (i = 0; i < 1000; i++) + buffer[i] = i; + + plan_tests(sizeof(flags) / sizeof(flags[0]) * 20 + 1); + + for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { + tdb = tdb_open("run-55-transaction.tdb", flags[i], + O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr); + ok1(tdb); + if (!tdb) + continue; + + ok1(tdb_transaction_start(tdb) == 0); + data.dptr = buffer; + data.dsize = 1000; + ok1(tdb_store(tdb, key, data, TDB_INSERT) == 0); + ok1(tdb_fetch(tdb, key, &data) == TDB_SUCCESS); + ok1(data.dsize == 1000); + ok1(memcmp(data.dptr, buffer, data.dsize) == 0); + free(data.dptr); + + /* Cancelling a transaction means no store */ + tdb_transaction_cancel(tdb); + ok1(tdb->file->allrecord_lock.count == 0 + && tdb->file->num_lockrecs == 0); + ok1(tdb_check(tdb, NULL, NULL) == 0); + ok1(tdb_fetch(tdb, key, &data) == TDB_ERR_NOEXIST); + + /* Commit the transaction. */ + ok1(tdb_transaction_start(tdb) == 0); + data.dptr = buffer; + data.dsize = 1000; + ok1(tdb_store(tdb, key, data, TDB_INSERT) == 0); + ok1(tdb_fetch(tdb, key, &data) == TDB_SUCCESS); + ok1(data.dsize == 1000); + ok1(memcmp(data.dptr, buffer, data.dsize) == 0); + free(data.dptr); + ok1(tdb_transaction_commit(tdb) == 0); + ok1(tdb->file->allrecord_lock.count == 0 + && tdb->file->num_lockrecs == 0); + ok1(tdb_check(tdb, NULL, NULL) == 0); + ok1(tdb_fetch(tdb, key, &data) == TDB_SUCCESS); + ok1(data.dsize == 1000); + ok1(memcmp(data.dptr, buffer, data.dsize) == 0); + free(data.dptr); + + tdb_close(tdb); + } + + ok1(tap_log_messages == 0); + free(buffer); + return exit_status(); +} diff --git a/lib/tdb2/test/run-56-open-during-transaction.c b/lib/tdb2/test/run-56-open-during-transaction.c new file mode 100644 index 0000000000..96107d637e --- /dev/null +++ b/lib/tdb2/test/run-56-open-during-transaction.c @@ -0,0 +1,175 @@ +#include "config.h" +#include <unistd.h> +#include "lock-tracking.h" + +static ssize_t pwrite_check(int fd, const void *buf, size_t count, off_t offset); +static ssize_t write_check(int fd, const void *buf, size_t count); +static int ftruncate_check(int fd, off_t length); + +#define pwrite pwrite_check +#define write write_check +#define fcntl fcntl_with_lockcheck +#define ftruncate ftruncate_check + +#include <ccan/tdb2/tdb.c> +#include <ccan/tdb2/open.c> +#include <ccan/tdb2/free.c> +#include <ccan/tdb2/lock.c> +#include <ccan/tdb2/io.c> +#include <ccan/tdb2/hash.c> +#include <ccan/tdb2/check.c> +#include <ccan/tdb2/transaction.c> +#include <ccan/tap/tap.h> +#include <stdlib.h> +#include <stdbool.h> +#include <stdarg.h> +#include <err.h> +#include "external-agent.h" +#include "logging.h" + +static struct agent *agent; +static bool opened; +static int errors = 0; +#define TEST_DBNAME "run-56-open-during-transaction.tdb" + +#undef write +#undef pwrite +#undef fcntl +#undef ftruncate + +static bool is_same(const char *snapshot, const char *latest, off_t len) +{ + unsigned i; + + for (i = 0; i < len; i++) { + if (snapshot[i] != latest[i]) + return false; + } + return true; +} + +static bool compare_file(int fd, const char *snapshot, off_t snapshot_len) +{ + char *contents; + bool same; + + /* over-length read serves as length check. */ + contents = malloc(snapshot_len+1); + same = pread(fd, contents, snapshot_len+1, 0) == snapshot_len + && is_same(snapshot, contents, snapshot_len); + free(contents); + return same; +} + +static void check_file_intact(int fd) +{ + enum agent_return ret; + struct stat st; + char *contents; + + fstat(fd, &st); + contents = malloc(st.st_size); + if (pread(fd, contents, st.st_size, 0) != st.st_size) { + diag("Read fail"); + errors++; + return; + } + + /* Ask agent to open file. */ + ret = external_agent_operation(agent, OPEN, TEST_DBNAME); + + /* It's OK to open it, but it must not have changed! */ + if (!compare_file(fd, contents, st.st_size)) { + diag("Agent changed file after opening %s", + agent_return_name(ret)); + errors++; + } + + if (ret == SUCCESS) { + ret = external_agent_operation(agent, CLOSE, NULL); + if (ret != SUCCESS) { + diag("Agent failed to close tdb: %s", + agent_return_name(ret)); + errors++; + } + } else if (ret != WOULD_HAVE_BLOCKED) { + diag("Agent opening file gave %s", + agent_return_name(ret)); + errors++; + } + + free(contents); +} + +static void after_unlock(int fd) +{ + if (opened) + check_file_intact(fd); +} + +static ssize_t pwrite_check(int fd, + const void *buf, size_t count, off_t offset) +{ + if (opened) + check_file_intact(fd); + + return pwrite(fd, buf, count, offset); +} + +static ssize_t write_check(int fd, const void *buf, size_t count) +{ + if (opened) + check_file_intact(fd); + + return write(fd, buf, count); +} + +static int ftruncate_check(int fd, off_t length) +{ + if (opened) + check_file_intact(fd); + + return ftruncate(fd, length); + +} + +int main(int argc, char *argv[]) +{ + const int flags[] = { TDB_DEFAULT, + TDB_NOMMAP, + TDB_CONVERT, + TDB_CONVERT | TDB_NOMMAP }; + int i; + struct tdb_context *tdb; + TDB_DATA key, data; + + plan_tests(20); + agent = prepare_external_agent(); + if (!agent) + err(1, "preparing agent"); + + unlock_callback = after_unlock; + for (i = 0; i < sizeof(flags)/sizeof(flags[0]); i++) { + diag("Test with %s and %s\n", + (flags[i] & TDB_CONVERT) ? "CONVERT" : "DEFAULT", + (flags[i] & TDB_NOMMAP) ? "no mmap" : "mmap"); + unlink(TEST_DBNAME); + tdb = tdb_open(TEST_DBNAME, flags[i], + O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr); + ok1(tdb); + + opened = true; + ok1(tdb_transaction_start(tdb) == 0); + key = tdb_mkdata("hi", strlen("hi")); + data = tdb_mkdata("world", strlen("world")); + + ok1(tdb_store(tdb, key, data, TDB_INSERT) == 0); + ok1(tdb_transaction_commit(tdb) == 0); + ok(!errors, "We had %u open errors", errors); + + opened = false; + tdb_close(tdb); + } + + return exit_status(); +} diff --git a/lib/tdb2/test/run-57-die-during-transaction.c b/lib/tdb2/test/run-57-die-during-transaction.c new file mode 100644 index 0000000000..84f01eb21a --- /dev/null +++ b/lib/tdb2/test/run-57-die-during-transaction.c @@ -0,0 +1,275 @@ +#include "config.h" +#include <unistd.h> +#include "lock-tracking.h" +#include <ccan/tap/tap.h> +#include <stdlib.h> +#include <assert.h> +static ssize_t pwrite_check(int fd, const void *buf, size_t count, off_t offset); +static ssize_t write_check(int fd, const void *buf, size_t count); +static int ftruncate_check(int fd, off_t length); + +#define pwrite pwrite_check +#define write write_check +#define fcntl fcntl_with_lockcheck +#define ftruncate ftruncate_check + +/* There's a malloc inside transaction_setup_recovery, and valgrind complains + * when we longjmp and leak it. */ +#define MAX_ALLOCATIONS 200 +static void *allocated[MAX_ALLOCATIONS]; + +static void *malloc_noleak(size_t len) +{ + unsigned int i; + + for (i = 0; i < MAX_ALLOCATIONS; i++) + if (!allocated[i]) { + allocated[i] = malloc(len); + return allocated[i]; + } + diag("Too many allocations!"); + abort(); +} + +static void free_noleak(void *p) +{ + unsigned int i; + + /* We don't catch realloc, so don't care if we miss one. */ + for (i = 0; i < MAX_ALLOCATIONS; i++) { + if (allocated[i] == p) { + allocated[i] = NULL; + break; + } + } + free(p); +} + +static void free_all(void) +{ + unsigned int i; + + for (i = 0; i < MAX_ALLOCATIONS; i++) { + free(allocated[i]); + allocated[i] = NULL; + } +} + +#define malloc malloc_noleak +#define free free_noleak + +#include <ccan/tdb2/tdb.c> +#include <ccan/tdb2/open.c> +#include <ccan/tdb2/free.c> +#include <ccan/tdb2/lock.c> +#include <ccan/tdb2/io.c> +#include <ccan/tdb2/hash.c> +#include <ccan/tdb2/check.c> +#include <ccan/tdb2/transaction.c> +#undef malloc +#undef free +#undef write +#undef pwrite +#undef fcntl +#undef ftruncate + +#include <stdbool.h> +#include <stdarg.h> +#include <err.h> +#include <setjmp.h> +#include "external-agent.h" +#include "logging.h" + +static bool in_transaction; +static int target, current; +static jmp_buf jmpbuf; +#define TEST_DBNAME "run-57-die-during-transaction.tdb" +#define KEY_STRING "helloworld" + +static void maybe_die(int fd) +{ + if (in_transaction && current++ == target) { + longjmp(jmpbuf, 1); + } +} + +static ssize_t pwrite_check(int fd, + const void *buf, size_t count, off_t offset) +{ + ssize_t ret; + + maybe_die(fd); + + ret = pwrite(fd, buf, count, offset); + if (ret != count) + return ret; + + maybe_die(fd); + return ret; +} + +static ssize_t write_check(int fd, const void *buf, size_t count) +{ + ssize_t ret; + + maybe_die(fd); + + ret = write(fd, buf, count); + if (ret != count) + return ret; + + maybe_die(fd); + return ret; +} + +static int ftruncate_check(int fd, off_t length) +{ + int ret; + + maybe_die(fd); + + ret = ftruncate(fd, length); + + maybe_die(fd); + return ret; +} + +static bool test_death(enum operation op, struct agent *agent) +{ + struct tdb_context *tdb = NULL; + TDB_DATA key; + enum agent_return ret; + int needed_recovery = 0; + + current = target = 0; +reset: + unlink(TEST_DBNAME); + tdb = tdb_open(TEST_DBNAME, TDB_NOMMAP, + O_CREAT|O_TRUNC|O_RDWR, 0600, &tap_log_attr); + if (!tdb) { + diag("Failed opening TDB: %s", strerror(errno)); + return false; + } + + if (setjmp(jmpbuf) != 0) { + /* We're partway through. Simulate our death. */ + close(tdb->file->fd); + forget_locking(); + in_transaction = false; + + ret = external_agent_operation(agent, NEEDS_RECOVERY, ""); + if (ret == SUCCESS) + needed_recovery++; + else if (ret != FAILED) { + diag("Step %u agent NEEDS_RECOVERY = %s", current, + agent_return_name(ret)); + return false; + } + + ret = external_agent_operation(agent, op, KEY_STRING); + if (ret != SUCCESS) { + diag("Step %u op %s failed = %s", current, + operation_name(op), + agent_return_name(ret)); + return false; + } + + ret = external_agent_operation(agent, NEEDS_RECOVERY, ""); + if (ret != FAILED) { + diag("Still needs recovery after step %u = %s", + current, agent_return_name(ret)); + return false; + } + + ret = external_agent_operation(agent, CHECK, ""); + if (ret != SUCCESS) { + diag("Step %u check failed = %s", current, + agent_return_name(ret)); + return false; + } + + ret = external_agent_operation(agent, CLOSE, ""); + if (ret != SUCCESS) { + diag("Step %u close failed = %s", current, + agent_return_name(ret)); + return false; + } + + /* Suppress logging as this tries to use closed fd. */ + suppress_logging = true; + suppress_lockcheck = true; + tdb_close(tdb); + suppress_logging = false; + suppress_lockcheck = false; + target++; + current = 0; + free_all(); + goto reset; + } + + /* Put key for agent to fetch. */ + key = tdb_mkdata(KEY_STRING, strlen(KEY_STRING)); + if (tdb_store(tdb, key, key, TDB_INSERT) != 0) + return false; + + /* This is the key we insert in transaction. */ + key.dsize--; + + ret = external_agent_operation(agent, OPEN, TEST_DBNAME); + if (ret != SUCCESS) + errx(1, "Agent failed to open: %s", agent_return_name(ret)); + + ret = external_agent_operation(agent, FETCH, KEY_STRING); + if (ret != SUCCESS) + errx(1, "Agent failed find key: %s", agent_return_name(ret)); + + in_transaction = true; + if (tdb_transaction_start(tdb) != 0) + return false; + + if (tdb_store(tdb, key, key, TDB_INSERT) != 0) + return false; + + if (tdb_transaction_commit(tdb) != 0) + return false; + + in_transaction = false; + + /* We made it! */ + diag("Completed %u runs", current); + tdb_close(tdb); + ret = external_agent_operation(agent, CLOSE, ""); + if (ret != SUCCESS) { + diag("Step %u close failed = %s", current, + agent_return_name(ret)); + return false; + } + + ok1(needed_recovery); + ok1(locking_errors == 0); + ok1(forget_locking() == 0); + locking_errors = 0; + return true; +} + +int main(int argc, char *argv[]) +{ + enum operation ops[] = { FETCH, STORE, TRANSACTION_START }; + struct agent *agent; + int i; + + plan_tests(12); + unlock_callback = maybe_die; + + agent = prepare_external_agent(); + if (!agent) + err(1, "preparing agent"); + + for (i = 0; i < sizeof(ops)/sizeof(ops[0]); i++) { + diag("Testing %s after death", operation_name(ops[i])); + ok1(test_death(ops[i], agent)); + } + + free_external_agent(agent); + return exit_status(); +} diff --git a/lib/tdb2/test/run-64-bit-tdb.c b/lib/tdb2/test/run-64-bit-tdb.c new file mode 100644 index 0000000000..78dadca016 --- /dev/null +++ b/lib/tdb2/test/run-64-bit-tdb.c @@ -0,0 +1,80 @@ +#include <ccan/tdb2/tdb.c> +#include <ccan/tdb2/open.c> +#include <ccan/tdb2/free.c> +#include <ccan/tdb2/lock.c> +#include <ccan/tdb2/io.c> +#include <ccan/tdb2/hash.c> +#include <ccan/tdb2/check.c> +#include <ccan/tdb2/traverse.c> +#include <ccan/tdb2/transaction.c> +#include <ccan/tap/tap.h> +#include "logging.h" + +int main(int argc, char *argv[]) +{ + unsigned int i; + struct tdb_context *tdb; + int flags[] = { TDB_DEFAULT, TDB_NOMMAP, + TDB_CONVERT, + TDB_NOMMAP|TDB_CONVERT }; + + if (sizeof(off_t) <= 4) { + plan_tests(1); + pass("No 64 bit off_t"); + return exit_status(); + } + + plan_tests(sizeof(flags) / sizeof(flags[0]) * 14); + for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { + off_t old_size; + TDB_DATA k, d; + struct hash_info h; + struct tdb_used_record rec; + tdb_off_t off; + + tdb = tdb_open("run-64-bit-tdb.tdb", flags[i], + O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr); + ok1(tdb); + if (!tdb) + continue; + + old_size = tdb->file->map_size; + + /* This makes a sparse file */ + ok1(ftruncate(tdb->file->fd, 0xFFFFFFF0) == 0); + ok1(add_free_record(tdb, old_size, 0xFFFFFFF0 - old_size, + TDB_LOCK_WAIT, false) == TDB_SUCCESS); + + /* Now add a little record past the 4G barrier. */ + ok1(tdb_expand_file(tdb, 100) == TDB_SUCCESS); + ok1(add_free_record(tdb, 0xFFFFFFF0, 100, TDB_LOCK_WAIT, false) + == TDB_SUCCESS); + + ok1(tdb_check(tdb, NULL, NULL) == TDB_SUCCESS); + + /* Test allocation path. */ + k = tdb_mkdata("key", 4); + d = tdb_mkdata("data", 5); + ok1(tdb_store(tdb, k, d, TDB_INSERT) == 0); + ok1(tdb_check(tdb, NULL, NULL) == TDB_SUCCESS); + + /* Make sure it put it at end as we expected. */ + off = find_and_lock(tdb, k, F_RDLCK, &h, &rec, NULL); + ok1(off >= 0xFFFFFFF0); + tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range, F_RDLCK); + + ok1(tdb_fetch(tdb, k, &d) == 0); + ok1(d.dsize == 5); + ok1(strcmp((char *)d.dptr, "data") == 0); + free(d.dptr); + + ok1(tdb_delete(tdb, k) == 0); + ok1(tdb_check(tdb, NULL, NULL) == TDB_SUCCESS); + + tdb_close(tdb); + } + + /* We might get messages about mmap failing, so don't test + * tap_log_messages */ + return exit_status(); +} diff --git a/lib/tdb2/test/run-80-tdb_fd.c b/lib/tdb2/test/run-80-tdb_fd.c new file mode 100644 index 0000000000..e8b2fae2dd --- /dev/null +++ b/lib/tdb2/test/run-80-tdb_fd.c @@ -0,0 +1,35 @@ +#include <ccan/tdb2/tdb.c> +#include <ccan/tdb2/open.c> +#include <ccan/tdb2/free.c> +#include <ccan/tdb2/lock.c> +#include <ccan/tdb2/io.c> +#include <ccan/tdb2/hash.c> +#include <ccan/tdb2/transaction.c> +#include <ccan/tdb2/check.c> +#include <ccan/tap/tap.h> +#include "logging.h" + +int main(int argc, char *argv[]) +{ + unsigned int i; + struct tdb_context *tdb; + int flags[] = { TDB_INTERNAL, TDB_DEFAULT, TDB_NOMMAP, + TDB_INTERNAL|TDB_CONVERT, TDB_CONVERT, + TDB_NOMMAP|TDB_CONVERT }; + + plan_tests(sizeof(flags) / sizeof(flags[0]) * 3); + for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { + tdb = tdb_open("run-new_database.tdb", flags[i], + O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr); + if (!ok1(tdb)) + continue; + + if (flags[i] & TDB_INTERNAL) + ok1(tdb_fd(tdb) == -1); + else + ok1(tdb_fd(tdb) > 2); + tdb_close(tdb); + ok1(tap_log_messages == 0); + } + return exit_status(); +} diff --git a/lib/tdb2/test/run-81-seqnum.c b/lib/tdb2/test/run-81-seqnum.c new file mode 100644 index 0000000000..6e8b2698b6 --- /dev/null +++ b/lib/tdb2/test/run-81-seqnum.c @@ -0,0 +1,71 @@ +#include <ccan/tdb2/tdb.c> +#include <ccan/tdb2/open.c> +#include <ccan/tdb2/free.c> +#include <ccan/tdb2/lock.c> +#include <ccan/tdb2/io.c> +#include <ccan/tdb2/hash.c> +#include <ccan/tdb2/transaction.c> +#include <ccan/tdb2/traverse.c> +#include <ccan/tdb2/check.c> +#include <ccan/tap/tap.h> +#include "logging.h" + +int main(int argc, char *argv[]) +{ + unsigned int i; + struct tdb_context *tdb; + struct tdb_data d = { NULL, 0 }; /* Bogus GCC warning */ + struct tdb_data key = tdb_mkdata("key", 3); + struct tdb_data data = tdb_mkdata("data", 4); + int flags[] = { TDB_INTERNAL, TDB_DEFAULT, TDB_NOMMAP, + TDB_INTERNAL|TDB_CONVERT, TDB_CONVERT, + TDB_NOMMAP|TDB_CONVERT }; + + plan_tests(sizeof(flags) / sizeof(flags[0]) * 15 + 4 * 13); + for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { + tdb = tdb_open("run-new_database.tdb", flags[i]|TDB_SEQNUM, + O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr); + if (!ok1(tdb)) + continue; + + ok1(tdb_get_seqnum(tdb) == 0); + ok1(tdb_store(tdb, key, data, TDB_INSERT) == 0); + ok1(tdb_get_seqnum(tdb) == 1); + /* Fetch doesn't change seqnum */ + if (ok1(tdb_fetch(tdb, key, &d) == TDB_SUCCESS)) + free(d.dptr); + ok1(tdb_get_seqnum(tdb) == 1); + ok1(tdb_append(tdb, key, data) == TDB_SUCCESS); + ok1(tdb_get_seqnum(tdb) == 2); + + ok1(tdb_delete(tdb, key) == TDB_SUCCESS); + ok1(tdb_get_seqnum(tdb) == 3); + /* Empty append works */ + ok1(tdb_append(tdb, key, data) == TDB_SUCCESS); + ok1(tdb_get_seqnum(tdb) == 4); + + ok1(tdb_wipe_all(tdb) == TDB_SUCCESS); + ok1(tdb_get_seqnum(tdb) == 5); + + if (!(flags[i] & TDB_INTERNAL)) { + ok1(tdb_transaction_start(tdb) == TDB_SUCCESS); + ok1(tdb_store(tdb, key, data, TDB_INSERT) == 0); + ok1(tdb_get_seqnum(tdb) == 6); + ok1(tdb_append(tdb, key, data) == TDB_SUCCESS); + ok1(tdb_get_seqnum(tdb) == 7); + ok1(tdb_delete(tdb, key) == TDB_SUCCESS); + ok1(tdb_get_seqnum(tdb) == 8); + ok1(tdb_transaction_commit(tdb) == TDB_SUCCESS); + ok1(tdb_get_seqnum(tdb) == 8); + + ok1(tdb_transaction_start(tdb) == TDB_SUCCESS); + ok1(tdb_store(tdb, key, data, TDB_INSERT) == 0); + ok1(tdb_get_seqnum(tdb) == 9); + tdb_transaction_cancel(tdb); + ok1(tdb_get_seqnum(tdb) == 8); + } + tdb_close(tdb); + ok1(tap_log_messages == 0); + } + return exit_status(); +} diff --git a/lib/tdb2/test/run-82-lockattr.c b/lib/tdb2/test/run-82-lockattr.c new file mode 100644 index 0000000000..bfc2653222 --- /dev/null +++ b/lib/tdb2/test/run-82-lockattr.c @@ -0,0 +1,263 @@ +#include <ccan/tdb2/tdb.c> +#include <ccan/tdb2/open.c> +#include <ccan/tdb2/free.c> +#include <ccan/tdb2/lock.c> +#include <ccan/tdb2/io.c> +#include <ccan/tdb2/hash.c> +#include <ccan/tdb2/check.c> +#include <ccan/tdb2/transaction.c> +#include <ccan/tdb2/traverse.c> +#include <ccan/tap/tap.h> +#include "logging.h" + +static int mylock(int fd, int rw, off_t off, off_t len, bool waitflag, + void *_err) +{ + int *lock_err = _err; + struct flock fl; + int ret; + + if (*lock_err) { + errno = *lock_err; + return -1; + } + + do { + fl.l_type = rw; + fl.l_whence = SEEK_SET; + fl.l_start = off; + fl.l_len = len; + + if (waitflag) + ret = fcntl(fd, F_SETLKW, &fl); + else + ret = fcntl(fd, F_SETLK, &fl); + } while (ret != 0 && errno == EINTR); + + return ret; +} + +static int myunlock(int fd, int rw, off_t off, off_t len, void *_err) +{ + int *lock_err = _err; + struct flock fl; + int ret; + + if (*lock_err) { + errno = *lock_err; + return -1; + } + + do { + fl.l_type = F_UNLCK; + fl.l_whence = SEEK_SET; + fl.l_start = off; + fl.l_len = len; + + ret = fcntl(fd, F_SETLKW, &fl); + } while (ret != 0 && errno == EINTR); + + return ret; +} + +static int trav_err; +static int trav(struct tdb_context *tdb, TDB_DATA k, TDB_DATA d, int *err) +{ + *err = trav_err; + return 0; +} + +int main(int argc, char *argv[]) +{ + unsigned int i; + struct tdb_context *tdb; + int flags[] = { TDB_DEFAULT, TDB_NOMMAP, + TDB_CONVERT, TDB_NOMMAP|TDB_CONVERT }; + union tdb_attribute lock_attr; + struct tdb_data key = tdb_mkdata("key", 3); + struct tdb_data data = tdb_mkdata("data", 4); + int lock_err; + + lock_attr.base.attr = TDB_ATTRIBUTE_FLOCK; + lock_attr.base.next = &tap_log_attr; + lock_attr.flock.lock = mylock; + lock_attr.flock.unlock = myunlock; + lock_attr.flock.data = &lock_err; + + plan_tests(sizeof(flags) / sizeof(flags[0]) * 80); + + for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { + struct tdb_data d; + + /* Nonblocking open; expect no error message. */ + lock_err = EAGAIN; + tdb = tdb_open("run-82-lockattr.tdb", flags[i], + O_RDWR|O_CREAT|O_TRUNC, 0600, &lock_attr); + ok(errno == lock_err, "Errno is %u", errno); + ok1(!tdb); + ok1(tap_log_messages == 0); + + lock_err = EINTR; + tdb = tdb_open("run-82-lockattr.tdb", flags[i], + O_RDWR|O_CREAT|O_TRUNC, 0600, &lock_attr); + ok(errno == lock_err, "Errno is %u", errno); + ok1(!tdb); + ok1(tap_log_messages == 0); + + /* Forced fail open. */ + lock_err = ENOMEM; + tdb = tdb_open("run-82-lockattr.tdb", flags[i], + O_RDWR|O_CREAT|O_TRUNC, 0600, &lock_attr); + ok1(errno == lock_err); + ok1(!tdb); + ok1(tap_log_messages == 1); + tap_log_messages = 0; + + lock_err = 0; + tdb = tdb_open("run-82-lockattr.tdb", flags[i], + O_RDWR|O_CREAT|O_TRUNC, 0600, &lock_attr); + if (!ok1(tdb)) + continue; + ok1(tap_log_messages == 0); + + /* Nonblocking store. */ + lock_err = EAGAIN; + ok1(tdb_store(tdb, key, data, TDB_REPLACE) == TDB_ERR_LOCK); + ok1(tap_log_messages == 0); + lock_err = EINTR; + ok1(tdb_store(tdb, key, data, TDB_REPLACE) == TDB_ERR_LOCK); + ok1(tap_log_messages == 0); + lock_err = ENOMEM; + ok1(tdb_store(tdb, key, data, TDB_REPLACE) == TDB_ERR_LOCK); + ok1(tap_log_messages == 1); + tap_log_messages = 0; + + /* Nonblocking fetch. */ + lock_err = EAGAIN; + ok1(!tdb_exists(tdb, key)); + ok1(tap_log_messages == 0); + lock_err = EINTR; + ok1(!tdb_exists(tdb, key)); + ok1(tap_log_messages == 0); + lock_err = ENOMEM; + ok1(!tdb_exists(tdb, key)); + ok1(tap_log_messages == 1); + tap_log_messages = 0; + + lock_err = EAGAIN; + ok1(tdb_fetch(tdb, key, &d) == TDB_ERR_LOCK); + ok1(tap_log_messages == 0); + lock_err = EINTR; + ok1(tdb_fetch(tdb, key, &d) == TDB_ERR_LOCK); + ok1(tap_log_messages == 0); + lock_err = ENOMEM; + ok1(tdb_fetch(tdb, key, &d) == TDB_ERR_LOCK); + ok1(tap_log_messages == 1); + tap_log_messages = 0; + + /* Nonblocking delete. */ + lock_err = EAGAIN; + ok1(tdb_delete(tdb, key) == TDB_ERR_LOCK); + ok1(tap_log_messages == 0); + lock_err = EINTR; + ok1(tdb_delete(tdb, key) == TDB_ERR_LOCK); + ok1(tap_log_messages == 0); + lock_err = ENOMEM; + ok1(tdb_delete(tdb, key) == TDB_ERR_LOCK); + ok1(tap_log_messages == 1); + tap_log_messages = 0; + + /* Nonblocking locks. */ + lock_err = EAGAIN; + ok1(tdb_chainlock(tdb, key) == TDB_ERR_LOCK); + ok1(tap_log_messages == 0); + lock_err = EINTR; + ok1(tdb_chainlock(tdb, key) == TDB_ERR_LOCK); + ok1(tap_log_messages == 0); + lock_err = ENOMEM; + ok1(tdb_chainlock(tdb, key) == TDB_ERR_LOCK); + ok1(tap_log_messages == 1); + tap_log_messages = 0; + + lock_err = EAGAIN; + ok1(tdb_chainlock_read(tdb, key) == TDB_ERR_LOCK); + ok1(tap_log_messages == 0); + lock_err = EINTR; + ok1(tdb_chainlock_read(tdb, key) == TDB_ERR_LOCK); + ok1(tap_log_messages == 0); + lock_err = ENOMEM; + ok1(tdb_chainlock_read(tdb, key) == TDB_ERR_LOCK); + ok1(tap_log_messages == 1); + tap_log_messages = 0; + + lock_err = EAGAIN; + ok1(tdb_lockall(tdb) == TDB_ERR_LOCK); + ok1(tap_log_messages == 0); + lock_err = EINTR; + ok1(tdb_lockall(tdb) == TDB_ERR_LOCK); + ok1(tap_log_messages == 0); + lock_err = ENOMEM; + ok1(tdb_lockall(tdb) == TDB_ERR_LOCK); + /* This actually does divide and conquer. */ + ok1(tap_log_messages > 0); + tap_log_messages = 0; + + lock_err = EAGAIN; + ok1(tdb_lockall_read(tdb) == TDB_ERR_LOCK); + ok1(tap_log_messages == 0); + lock_err = EINTR; + ok1(tdb_lockall_read(tdb) == TDB_ERR_LOCK); + ok1(tap_log_messages == 0); + lock_err = ENOMEM; + ok1(tdb_lockall_read(tdb) == TDB_ERR_LOCK); + ok1(tap_log_messages > 0); + tap_log_messages = 0; + + /* Nonblocking traverse; go nonblock partway through. */ + lock_err = 0; + ok1(tdb_store(tdb, key, data, TDB_REPLACE) == 0); + trav_err = EAGAIN; + ok1(tdb_traverse(tdb, trav, &lock_err) == TDB_ERR_LOCK); + ok1(tap_log_messages == 0); + trav_err = EINTR; + lock_err = 0; + ok1(tdb_traverse(tdb, trav, &lock_err) == TDB_ERR_LOCK); + ok1(tap_log_messages == 0); + trav_err = ENOMEM; + lock_err = 0; + ok1(tdb_traverse(tdb, trav, &lock_err) == TDB_ERR_LOCK); + ok1(tap_log_messages == 1); + tap_log_messages = 0; + + /* Nonblocking transactions. */ + lock_err = EAGAIN; + ok1(tdb_transaction_start(tdb) == TDB_ERR_LOCK); + ok1(tap_log_messages == 0); + lock_err = EINTR; + ok1(tdb_transaction_start(tdb) == TDB_ERR_LOCK); + ok1(tap_log_messages == 0); + lock_err = ENOMEM; + ok1(tdb_transaction_start(tdb) == TDB_ERR_LOCK); + ok1(tap_log_messages == 1); + tap_log_messages = 0; + + /* Nonblocking transaction prepare. */ + lock_err = 0; + ok1(tdb_transaction_start(tdb) == 0); + ok1(tdb_delete(tdb, key) == 0); + + lock_err = EAGAIN; + ok1(tdb_transaction_prepare_commit(tdb) == TDB_ERR_LOCK); + ok1(tap_log_messages == 0); + + lock_err = 0; + ok1(tdb_transaction_prepare_commit(tdb) == 0); + ok1(tdb_transaction_commit(tdb) == 0); + + /* And the transaction was committed, right? */ + ok1(!tdb_exists(tdb, key)); + tdb_close(tdb); + ok1(tap_log_messages == 0); + } + return exit_status(); +} diff --git a/lib/tdb2/test/run-83-openhook.c b/lib/tdb2/test/run-83-openhook.c new file mode 100644 index 0000000000..320be7d4da --- /dev/null +++ b/lib/tdb2/test/run-83-openhook.c @@ -0,0 +1,98 @@ +#include <ccan/tdb2/tdb.c> +#include <ccan/tdb2/open.c> +#include <ccan/tdb2/free.c> +#include <ccan/tdb2/lock.c> +#include <ccan/tdb2/io.c> +#include <ccan/tdb2/hash.c> +#include <ccan/tdb2/check.c> +#include <ccan/tdb2/transaction.c> +#include <ccan/tap/tap.h> +#include <stdlib.h> +#include <stdbool.h> +#include <stdarg.h> +#include <err.h> +#include "external-agent.h" +#include "logging.h" + +static enum TDB_ERROR clear_if_first(int fd, void *arg) +{ +/* We hold a lock offset 63 always, so we can tell if anyone is holding it. */ + struct flock fl; + + if (arg != clear_if_first) + return TDB_ERR_CORRUPT; + + fl.l_type = F_WRLCK; + fl.l_whence = SEEK_SET; + fl.l_start = 63; + fl.l_len = 1; + + if (fcntl(fd, F_SETLK, &fl) == 0) { + /* We must be first ones to open it! */ + diag("truncating file!"); + if (ftruncate(fd, 0) != 0) { + return TDB_ERR_IO; + } + } + fl.l_type = F_RDLCK; + if (fcntl(fd, F_SETLKW, &fl) != 0) { + return TDB_ERR_IO; + } + return TDB_SUCCESS; +} + +int main(int argc, char *argv[]) +{ + unsigned int i; + struct tdb_context *tdb; + struct agent *agent; + union tdb_attribute cif; + struct tdb_data key = tdb_mkdata("key", 3); + int flags[] = { TDB_DEFAULT, TDB_NOMMAP, + TDB_CONVERT, TDB_NOMMAP|TDB_CONVERT }; + + cif.openhook.base.attr = TDB_ATTRIBUTE_OPENHOOK; + cif.openhook.base.next = &tap_log_attr; + cif.openhook.fn = clear_if_first; + cif.openhook.data = clear_if_first; + + agent = prepare_external_agent(); + plan_tests(sizeof(flags) / sizeof(flags[0]) * 13); + for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { + /* Create it */ + tdb = tdb_open("run-83-openhook.tdb", flags[i], + O_RDWR|O_CREAT|O_TRUNC, 0600, NULL); + ok1(tdb); + ok1(tdb_store(tdb, key, key, TDB_REPLACE) == 0); + tdb_close(tdb); + + /* Now, open with CIF, should clear it. */ + tdb = tdb_open("run-83-openhook.tdb", flags[i], + O_RDWR, 0, &cif); + ok1(tdb); + ok1(!tdb_exists(tdb, key)); + ok1(tdb_store(tdb, key, key, TDB_REPLACE) == 0); + + /* Agent should not clear it, since it's still open. */ + ok1(external_agent_operation(agent, OPEN_WITH_HOOK, + "run-83-openhook.tdb") == SUCCESS); + ok1(external_agent_operation(agent, FETCH, "key") == SUCCESS); + ok1(external_agent_operation(agent, CLOSE, "") == SUCCESS); + + /* Still exists for us too. */ + ok1(tdb_exists(tdb, key)); + + /* Close it, now agent should clear it. */ + tdb_close(tdb); + + ok1(external_agent_operation(agent, OPEN_WITH_HOOK, + "run-83-openhook.tdb") == SUCCESS); + ok1(external_agent_operation(agent, FETCH, "key") == FAILED); + ok1(external_agent_operation(agent, CLOSE, "") == SUCCESS); + + ok1(tap_log_messages == 0); + } + + free_external_agent(agent); + return exit_status(); +} diff --git a/lib/tdb2/test/run-90-get-set-attributes.c b/lib/tdb2/test/run-90-get-set-attributes.c new file mode 100644 index 0000000000..159d8a01ea --- /dev/null +++ b/lib/tdb2/test/run-90-get-set-attributes.c @@ -0,0 +1,165 @@ +#include <ccan/tdb2/tdb.c> +#include <ccan/tdb2/open.c> +#include <ccan/tdb2/free.c> +#include <ccan/tdb2/lock.c> +#include <ccan/tdb2/io.c> +#include <ccan/tdb2/hash.c> +#include <ccan/tdb2/check.c> +#include <ccan/tdb2/transaction.c> +#include <ccan/tdb2/traverse.c> +#include <ccan/tap/tap.h> +#include "logging.h" + +static int mylock(int fd, int rw, off_t off, off_t len, bool waitflag, + void *unused) +{ + return 0; +} + +static int myunlock(int fd, int rw, off_t off, off_t len, void *unused) +{ + return 0; +} + +static uint64_t hash_fn(const void *key, size_t len, uint64_t seed, + void *priv) +{ + return 0; +} + +int main(int argc, char *argv[]) +{ + unsigned int i; + struct tdb_context *tdb; + int flags[] = { TDB_DEFAULT, TDB_NOMMAP, + TDB_CONVERT, TDB_NOMMAP|TDB_CONVERT }; + union tdb_attribute seed_attr; + union tdb_attribute hash_attr; + union tdb_attribute lock_attr; + + hash_attr.base.attr = TDB_ATTRIBUTE_HASH; + hash_attr.base.next = &seed_attr; + hash_attr.hash.fn = hash_fn; + hash_attr.hash.data = &hash_attr; + + seed_attr.base.attr = TDB_ATTRIBUTE_SEED; + seed_attr.base.next = &lock_attr; + seed_attr.seed.seed = 100; + + lock_attr.base.attr = TDB_ATTRIBUTE_FLOCK; + lock_attr.base.next = &tap_log_attr; + lock_attr.flock.lock = mylock; + lock_attr.flock.unlock = myunlock; + lock_attr.flock.data = &lock_attr; + + plan_tests(sizeof(flags) / sizeof(flags[0]) * 50); + + for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { + union tdb_attribute attr; + + /* First open with no attributes. */ + tdb = tdb_open("run-90-get-set-attributes.tdb", flags[i], + O_RDWR|O_CREAT|O_TRUNC, 0600, NULL); + ok1(tdb); + + /* Get log on no attributes will fail */ + attr.base.attr = TDB_ATTRIBUTE_LOG; + ok1(tdb_get_attribute(tdb, &attr) == TDB_ERR_NOEXIST); + /* These always work. */ + attr.base.attr = TDB_ATTRIBUTE_HASH; + ok1(tdb_get_attribute(tdb, &attr) == 0); + ok1(attr.base.attr == TDB_ATTRIBUTE_HASH); + ok1(attr.hash.fn == jenkins_hash); + attr.base.attr = TDB_ATTRIBUTE_FLOCK; + ok1(tdb_get_attribute(tdb, &attr) == 0); + ok1(attr.base.attr == TDB_ATTRIBUTE_FLOCK); + ok1(attr.flock.lock == tdb_fcntl_lock); + ok1(attr.flock.unlock == tdb_fcntl_unlock); + attr.base.attr = TDB_ATTRIBUTE_SEED; + ok1(tdb_get_attribute(tdb, &attr) == 0); + ok1(attr.base.attr == TDB_ATTRIBUTE_SEED); + /* This is possible, just astronomically unlikely. */ + ok1(attr.seed.seed != 0); + + /* Unset attributes. */ + tdb_unset_attribute(tdb, TDB_ATTRIBUTE_LOG); + tdb_unset_attribute(tdb, TDB_ATTRIBUTE_FLOCK); + + /* Set them. */ + ok1(tdb_set_attribute(tdb, &tap_log_attr) == 0); + ok1(tdb_set_attribute(tdb, &lock_attr) == 0); + /* These should fail. */ + ok1(tdb_set_attribute(tdb, &seed_attr) == TDB_ERR_EINVAL); + ok1(tap_log_messages == 1); + ok1(tdb_set_attribute(tdb, &hash_attr) == TDB_ERR_EINVAL); + ok1(tap_log_messages == 2); + tap_log_messages = 0; + + /* Getting them should work as expected. */ + attr.base.attr = TDB_ATTRIBUTE_LOG; + ok1(tdb_get_attribute(tdb, &attr) == 0); + ok1(attr.base.attr == TDB_ATTRIBUTE_LOG); + ok1(attr.log.fn == tap_log_attr.log.fn); + ok1(attr.log.data == tap_log_attr.log.data); + + attr.base.attr = TDB_ATTRIBUTE_FLOCK; + ok1(tdb_get_attribute(tdb, &attr) == 0); + ok1(attr.base.attr == TDB_ATTRIBUTE_FLOCK); + ok1(attr.flock.lock == mylock); + ok1(attr.flock.unlock == myunlock); + ok1(attr.flock.data == &lock_attr); + + /* Unset them again. */ + tdb_unset_attribute(tdb, TDB_ATTRIBUTE_FLOCK); + ok1(tap_log_messages == 0); + tdb_unset_attribute(tdb, TDB_ATTRIBUTE_LOG); + ok1(tap_log_messages == 0); + + tdb_close(tdb); + ok1(tap_log_messages == 0); + + /* Now open with all attributes. */ + tdb = tdb_open("run-90-get-set-attributes.tdb", flags[i], + O_RDWR|O_CREAT|O_TRUNC, 0600, &hash_attr); + ok1(tdb); + + /* Get will succeed */ + attr.base.attr = TDB_ATTRIBUTE_LOG; + ok1(tdb_get_attribute(tdb, &attr) == 0); + ok1(attr.base.attr == TDB_ATTRIBUTE_LOG); + ok1(attr.log.fn == tap_log_attr.log.fn); + ok1(attr.log.data == tap_log_attr.log.data); + + attr.base.attr = TDB_ATTRIBUTE_HASH; + ok1(tdb_get_attribute(tdb, &attr) == 0); + ok1(attr.base.attr == TDB_ATTRIBUTE_HASH); + ok1(attr.hash.fn == hash_fn); + ok1(attr.hash.data == &hash_attr); + + attr.base.attr = TDB_ATTRIBUTE_FLOCK; + ok1(tdb_get_attribute(tdb, &attr) == 0); + ok1(attr.base.attr == TDB_ATTRIBUTE_FLOCK); + ok1(attr.flock.lock == mylock); + ok1(attr.flock.unlock == myunlock); + ok1(attr.flock.data == &lock_attr); + + attr.base.attr = TDB_ATTRIBUTE_SEED; + ok1(tdb_get_attribute(tdb, &attr) == 0); + ok1(attr.base.attr == TDB_ATTRIBUTE_SEED); + ok1(attr.seed.seed == seed_attr.seed.seed); + + /* Unset attributes. */ + tdb_unset_attribute(tdb, TDB_ATTRIBUTE_HASH); + ok1(tap_log_messages == 1); + tdb_unset_attribute(tdb, TDB_ATTRIBUTE_SEED); + ok1(tap_log_messages == 2); + tdb_unset_attribute(tdb, TDB_ATTRIBUTE_FLOCK); + tdb_unset_attribute(tdb, TDB_ATTRIBUTE_LOG); + ok1(tap_log_messages == 2); + tap_log_messages = 0; + + tdb_close(tdb); + + } + return exit_status(); +} diff --git a/lib/tdb2/test/run-91-get-stats.c b/lib/tdb2/test/run-91-get-stats.c new file mode 100644 index 0000000000..795dfd6602 --- /dev/null +++ b/lib/tdb2/test/run-91-get-stats.c @@ -0,0 +1,59 @@ +#include <ccan/tdb2/tdb.c> +#include <ccan/tdb2/open.c> +#include <ccan/tdb2/free.c> +#include <ccan/tdb2/lock.c> +#include <ccan/tdb2/io.c> +#include <ccan/tdb2/hash.c> +#include <ccan/tdb2/check.c> +#include <ccan/tdb2/transaction.c> +#include <ccan/tdb2/traverse.c> +#include <ccan/tap/tap.h> +#include "logging.h" + +int main(int argc, char *argv[]) +{ + unsigned int i; + struct tdb_context *tdb; + int flags[] = { TDB_DEFAULT, TDB_NOMMAP, + TDB_CONVERT, TDB_NOMMAP|TDB_CONVERT }; + + plan_tests(sizeof(flags) / sizeof(flags[0]) * 11); + + for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { + union tdb_attribute *attr; + struct tdb_data key = tdb_mkdata("key", 3); + + tdb = tdb_open("run-91-get-stats.tdb", flags[i], + O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr); + ok1(tdb); + ok1(tdb_store(tdb, key, key, TDB_REPLACE) == 0); + + /* Use malloc so valgrind will catch overruns. */ + attr = malloc(sizeof *attr); + attr->stats.base.attr = TDB_ATTRIBUTE_STATS; + attr->stats.size = sizeof(*attr); + + ok1(tdb_get_attribute(tdb, attr) == 0); + ok1(attr->stats.size == sizeof(*attr)); + ok1(attr->stats.allocs > 0); + ok1(attr->stats.expands > 0); + ok1(attr->stats.locks > 0); + free(attr); + + /* Try short one. */ + attr = malloc(offsetof(struct tdb_attribute_stats, allocs) + + sizeof(attr->stats.allocs)); + attr->stats.base.attr = TDB_ATTRIBUTE_STATS; + attr->stats.size = offsetof(struct tdb_attribute_stats, allocs) + + sizeof(attr->stats.allocs); + ok1(tdb_get_attribute(tdb, attr) == 0); + ok1(attr->stats.size == sizeof(*attr)); + ok1(attr->stats.allocs > 0); + free(attr); + ok1(tap_log_messages == 0); + + tdb_close(tdb); + + } + return exit_status(); +} diff --git a/lib/tdb2/test/run-add-remove-flags.c b/lib/tdb2/test/run-add-remove-flags.c new file mode 100644 index 0000000000..1dc8463662 --- /dev/null +++ b/lib/tdb2/test/run-add-remove-flags.c @@ -0,0 +1,93 @@ +#include <ccan/tdb2/tdb.c> +#include <ccan/tdb2/open.c> +#include <ccan/tdb2/free.c> +#include <ccan/tdb2/lock.c> +#include <ccan/tdb2/io.c> +#include <ccan/tdb2/hash.c> +#include <ccan/tdb2/check.c> +#include <ccan/tdb2/traverse.c> +#include <ccan/tdb2/transaction.c> +#include <ccan/tap/tap.h> +#include "logging.h" + +int main(int argc, char *argv[]) +{ + unsigned int i; + struct tdb_context *tdb; + int flags[] = { TDB_INTERNAL, TDB_DEFAULT, TDB_NOMMAP, + TDB_INTERNAL|TDB_CONVERT, TDB_CONVERT, + TDB_NOMMAP|TDB_CONVERT }; + + plan_tests(87); + for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { + tdb = tdb_open("run-add-remove-flags.tdb", flags[i], + O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr); + ok1(tdb); + if (!tdb) + continue; + + ok1(tdb_get_flags(tdb) == tdb->flags); + tap_log_messages = 0; + tdb_add_flag(tdb, TDB_NOLOCK); + if (flags[i] & TDB_INTERNAL) + ok1(tap_log_messages == 1); + else { + ok1(tap_log_messages == 0); + ok1(tdb_get_flags(tdb) & TDB_NOLOCK); + } + + tap_log_messages = 0; + tdb_add_flag(tdb, TDB_NOMMAP); + if (flags[i] & TDB_INTERNAL) + ok1(tap_log_messages == 1); + else { + ok1(tap_log_messages == 0); + ok1(tdb_get_flags(tdb) & TDB_NOMMAP); + ok1(tdb->file->map_ptr == NULL); + } + + tap_log_messages = 0; + tdb_add_flag(tdb, TDB_NOSYNC); + if (flags[i] & TDB_INTERNAL) + ok1(tap_log_messages == 1); + else { + ok1(tap_log_messages == 0); + ok1(tdb_get_flags(tdb) & TDB_NOSYNC); + } + + ok1(tdb_get_flags(tdb) == tdb->flags); + + tap_log_messages = 0; + tdb_remove_flag(tdb, TDB_NOLOCK); + if (flags[i] & TDB_INTERNAL) + ok1(tap_log_messages == 1); + else { + ok1(tap_log_messages == 0); + ok1(!(tdb_get_flags(tdb) & TDB_NOLOCK)); + } + + tap_log_messages = 0; + tdb_remove_flag(tdb, TDB_NOMMAP); + if (flags[i] & TDB_INTERNAL) + ok1(tap_log_messages == 1); + else { + ok1(tap_log_messages == 0); + ok1(!(tdb_get_flags(tdb) & TDB_NOMMAP)); + ok1(tdb->file->map_ptr != NULL); + } + + tap_log_messages = 0; + tdb_remove_flag(tdb, TDB_NOSYNC); + if (flags[i] & TDB_INTERNAL) + ok1(tap_log_messages == 1); + else { + ok1(tap_log_messages == 0); + ok1(!(tdb_get_flags(tdb) & TDB_NOSYNC)); + } + + tdb_close(tdb); + } + + ok1(tap_log_messages == 0); + return exit_status(); +} diff --git a/lib/tdb2/test/run-check-callback.c b/lib/tdb2/test/run-check-callback.c new file mode 100644 index 0000000000..1e87436717 --- /dev/null +++ b/lib/tdb2/test/run-check-callback.c @@ -0,0 +1,90 @@ +#include <ccan/tdb2/tdb.c> +#include <ccan/tdb2/free.c> +#include <ccan/tdb2/lock.c> +#include <ccan/tdb2/io.c> +#include <ccan/tdb2/hash.c> +#include <ccan/tdb2/check.c> +#include <ccan/tdb2/traverse.c> +#include <ccan/tdb2/transaction.c> +#include <ccan/tdb2/open.c> +#include <ccan/tap/tap.h> +#include "logging.h" + +#define NUM_RECORDS 1000 + +static bool store_records(struct tdb_context *tdb) +{ + int i; + struct tdb_data key = { (unsigned char *)&i, sizeof(i) }; + struct tdb_data data = { (unsigned char *)&i, sizeof(i) }; + + for (i = 0; i < NUM_RECORDS; i++) + if (tdb_store(tdb, key, data, TDB_REPLACE) != 0) + return false; + return true; +} + +static enum TDB_ERROR check(struct tdb_data key, + struct tdb_data data, + bool *array) +{ + int val; + + if (key.dsize != sizeof(val)) { + diag("Wrong key size: %u\n", key.dsize); + return TDB_ERR_CORRUPT; + } + + if (key.dsize != data.dsize + || memcmp(key.dptr, data.dptr, sizeof(val)) != 0) { + diag("Key and data differ\n"); + return TDB_ERR_CORRUPT; + } + + memcpy(&val, key.dptr, sizeof(val)); + if (val >= NUM_RECORDS || val < 0) { + diag("check value %i\n", val); + return TDB_ERR_CORRUPT; + } + + if (array[val]) { + diag("Value %i already seen\n", val); + return TDB_ERR_CORRUPT; + } + + array[val] = true; + return TDB_SUCCESS; +} + +int main(int argc, char *argv[]) +{ + unsigned int i, j; + struct tdb_context *tdb; + int flags[] = { TDB_INTERNAL, TDB_DEFAULT, TDB_NOMMAP, + TDB_INTERNAL|TDB_CONVERT, TDB_CONVERT, + TDB_NOMMAP|TDB_CONVERT }; + + plan_tests(sizeof(flags) / sizeof(flags[0]) * 4 + 1); + for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { + bool array[NUM_RECORDS]; + + tdb = tdb_open("run-check-callback.tdb", flags[i], + O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr); + ok1(tdb); + if (!tdb) + continue; + + ok1(store_records(tdb)); + for (j = 0; j < NUM_RECORDS; j++) + array[j] = false; + ok1(tdb_check(tdb, check, array) == TDB_SUCCESS); + for (j = 0; j < NUM_RECORDS; j++) + if (!array[j]) + break; + ok1(j == NUM_RECORDS); + tdb_close(tdb); + } + + ok1(tap_log_messages == 0); + return exit_status(); +} diff --git a/lib/tdb2/test/run-expand-in-transaction.c b/lib/tdb2/test/run-expand-in-transaction.c new file mode 100644 index 0000000000..49ba03c924 --- /dev/null +++ b/lib/tdb2/test/run-expand-in-transaction.c @@ -0,0 +1,45 @@ +#include <ccan/tdb2/tdb.c> +#include <ccan/tdb2/open.c> +#include <ccan/tdb2/free.c> +#include <ccan/tdb2/lock.c> +#include <ccan/tdb2/io.c> +#include <ccan/tdb2/hash.c> +#include <ccan/tdb2/check.c> +#include <ccan/tdb2/transaction.c> +#include <ccan/tap/tap.h> +#include "logging.h" + +int main(int argc, char *argv[]) +{ + unsigned int i; + struct tdb_context *tdb; + int flags[] = { TDB_DEFAULT, TDB_NOMMAP, + TDB_CONVERT, TDB_NOMMAP|TDB_CONVERT, + TDB_CONVERT|TDB_NOSYNC, + TDB_NOMMAP|TDB_CONVERT|TDB_NOSYNC }; + struct tdb_data key = tdb_mkdata("key", 3); + struct tdb_data data = tdb_mkdata("data", 4); + + plan_tests(sizeof(flags) / sizeof(flags[0]) * 7 + 1); + + for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { + size_t size; + tdb = tdb_open("run-expand-in-transaction.tdb", flags[i], + O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr); + ok1(tdb); + if (!tdb) + continue; + + size = tdb->file->map_size; + ok1(tdb_transaction_start(tdb) == 0); + ok1(tdb_store(tdb, key, data, TDB_INSERT) == 0); + ok1(tdb->file->map_size > size); + ok1(tdb_transaction_commit(tdb) == 0); + ok1(tdb->file->map_size > size); + ok1(tdb_check(tdb, NULL, NULL) == 0); + tdb_close(tdb); + } + + ok1(tap_log_messages == 0); + return exit_status(); +} diff --git a/lib/tdb2/test/run-features.c b/lib/tdb2/test/run-features.c new file mode 100644 index 0000000000..6d82dc308c --- /dev/null +++ b/lib/tdb2/test/run-features.c @@ -0,0 +1,70 @@ +#include <ccan/tdb2/tdb.c> +#include <ccan/tdb2/open.c> +#include <ccan/tdb2/free.c> +#include <ccan/tdb2/lock.c> +#include <ccan/tdb2/io.c> +#include <ccan/tdb2/hash.c> +#include <ccan/tdb2/check.c> +#include <ccan/tdb2/summary.c> +#include <ccan/tdb2/transaction.c> +#include <ccan/tap/tap.h> +#include "logging.h" + +int main(int argc, char *argv[]) +{ + unsigned int i, j; + struct tdb_context *tdb; + int flags[] = { TDB_DEFAULT, TDB_NOMMAP, + TDB_CONVERT, TDB_NOMMAP|TDB_CONVERT }; + struct tdb_data key = { (unsigned char *)&j, sizeof(j) }; + struct tdb_data data = { (unsigned char *)&j, sizeof(j) }; + + plan_tests(sizeof(flags) / sizeof(flags[0]) * 8 + 1); + for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { + uint64_t features; + tdb = tdb_open("run-features.tdb", flags[i], + O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr); + ok1(tdb); + if (!tdb) + continue; + + /* Put some stuff in there. */ + for (j = 0; j < 100; j++) { + if (tdb_store(tdb, key, data, TDB_REPLACE) != 0) + fail("Storing in tdb"); + } + + /* Mess with features fields in hdr. */ + features = (~TDB_FEATURE_MASK ^ 1); + ok1(tdb_write_convert(tdb, offsetof(struct tdb_header, + features_used), + &features, sizeof(features)) == 0); + ok1(tdb_write_convert(tdb, offsetof(struct tdb_header, + features_offered), + &features, sizeof(features)) == 0); + tdb_close(tdb); + + tdb = tdb_open("run-features.tdb", flags[i], O_RDWR, 0, + &tap_log_attr); + ok1(tdb); + if (!tdb) + continue; + + /* Should not have changed features offered. */ + ok1(tdb_read_convert(tdb, offsetof(struct tdb_header, + features_offered), + &features, sizeof(features)) == 0); + ok1(features == (~TDB_FEATURE_MASK ^ 1)); + + /* Should have cleared unknown bits in features_used. */ + ok1(tdb_read_convert(tdb, offsetof(struct tdb_header, + features_used), + &features, sizeof(features)) == 0); + ok1(features == (1 & TDB_FEATURE_MASK)); + + tdb_close(tdb); + } + + ok1(tap_log_messages == 0); + return exit_status(); +} diff --git a/lib/tdb2/test/run-firstkey-nextkey.c b/lib/tdb2/test/run-firstkey-nextkey.c new file mode 100644 index 0000000000..65a6090a96 --- /dev/null +++ b/lib/tdb2/test/run-firstkey-nextkey.c @@ -0,0 +1,162 @@ +#include <ccan/tdb2/tdb.c> +#include <ccan/tdb2/open.c> +#include <ccan/tdb2/free.c> +#include <ccan/tdb2/lock.c> +#include <ccan/tdb2/io.c> +#include <ccan/tdb2/hash.c> +#include <ccan/tdb2/check.c> +#include <ccan/tdb2/traverse.c> +#include <ccan/tdb2/transaction.c> +#include <ccan/tap/tap.h> +#include "logging.h" + +#define NUM_RECORDS 1000 + +static bool store_records(struct tdb_context *tdb) +{ + int i; + struct tdb_data key = { (unsigned char *)&i, sizeof(i) }; + struct tdb_data data = { (unsigned char *)&i, sizeof(i) }; + + for (i = 0; i < NUM_RECORDS; i++) + if (tdb_store(tdb, key, data, TDB_REPLACE) != 0) + return false; + return true; +} + +struct trav_data { + unsigned int records[NUM_RECORDS]; + unsigned int calls; +}; + +static int trav(struct tdb_context *tdb, TDB_DATA key, TDB_DATA dbuf, void *p) +{ + struct trav_data *td = p; + int val; + + memcpy(&val, dbuf.dptr, dbuf.dsize); + td->records[td->calls++] = val; + return 0; +} + +/* Since tdb_nextkey frees dptr, we need to clone it. */ +static TDB_DATA dup_key(TDB_DATA key) +{ + void *p = malloc(key.dsize); + memcpy(p, key.dptr, key.dsize); + key.dptr = p; + return key; +} + +int main(int argc, char *argv[]) +{ + unsigned int i, j; + int num; + struct trav_data td; + TDB_DATA k; + struct tdb_context *tdb; + union tdb_attribute seed_attr; + enum TDB_ERROR ecode; + + int flags[] = { TDB_INTERNAL, TDB_DEFAULT, TDB_NOMMAP, + TDB_INTERNAL|TDB_CONVERT, TDB_CONVERT, + TDB_NOMMAP|TDB_CONVERT }; + + seed_attr.base.attr = TDB_ATTRIBUTE_SEED; + seed_attr.base.next = &tap_log_attr; + seed_attr.seed.seed = 6334326220117065685ULL; + + plan_tests(sizeof(flags) / sizeof(flags[0]) + * (NUM_RECORDS*6 + (NUM_RECORDS-1)*3 + 22) + 1); + for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { + tdb = tdb_open("run-traverse.tdb", flags[i], + O_RDWR|O_CREAT|O_TRUNC, 0600, &seed_attr); + ok1(tdb); + if (!tdb) + continue; + + ok1(tdb_firstkey(tdb, &k) == TDB_ERR_NOEXIST); + + /* One entry... */ + k.dptr = (unsigned char *)# + k.dsize = sizeof(num); + num = 0; + ok1(tdb_store(tdb, k, k, TDB_INSERT) == 0); + ok1(tdb_firstkey(tdb, &k) == TDB_SUCCESS); + ok1(k.dsize == sizeof(num)); + ok1(memcmp(k.dptr, &num, sizeof(num)) == 0); + ok1(tdb_nextkey(tdb, &k) == TDB_ERR_NOEXIST); + + /* Two entries. */ + k.dptr = (unsigned char *)# + k.dsize = sizeof(num); + num = 1; + ok1(tdb_store(tdb, k, k, TDB_INSERT) == 0); + ok1(tdb_firstkey(tdb, &k) == TDB_SUCCESS); + ok1(k.dsize == sizeof(num)); + memcpy(&num, k.dptr, sizeof(num)); + ok1(num == 0 || num == 1); + ok1(tdb_nextkey(tdb, &k) == TDB_SUCCESS); + ok1(k.dsize == sizeof(j)); + memcpy(&j, k.dptr, sizeof(j)); + ok1(j == 0 || j == 1); + ok1(j != num); + ok1(tdb_nextkey(tdb, &k) == TDB_ERR_NOEXIST); + + /* Clean up. */ + k.dptr = (unsigned char *)# + k.dsize = sizeof(num); + num = 0; + ok1(tdb_delete(tdb, k) == 0); + num = 1; + ok1(tdb_delete(tdb, k) == 0); + + /* Now lots of records. */ + ok1(store_records(tdb)); + td.calls = 0; + + num = tdb_traverse(tdb, trav, &td); + ok1(num == NUM_RECORDS); + ok1(td.calls == NUM_RECORDS); + + /* Simple loop should match tdb_traverse */ + for (j = 0, ecode = tdb_firstkey(tdb, &k); j < td.calls; j++) { + int val; + + ok1(ecode == TDB_SUCCESS); + ok1(k.dsize == sizeof(val)); + memcpy(&val, k.dptr, k.dsize); + ok1(td.records[j] == val); + ecode = tdb_nextkey(tdb, &k); + } + + /* But arbitrary orderings should work too. */ + for (j = td.calls-1; j > 0; j--) { + k.dptr = (unsigned char *)&td.records[j-1]; + k.dsize = sizeof(td.records[j-1]); + k = dup_key(k); + ok1(tdb_nextkey(tdb, &k) == TDB_SUCCESS); + ok1(k.dsize == sizeof(td.records[j])); + ok1(memcmp(k.dptr, &td.records[j], k.dsize) == 0); + free(k.dptr); + } + + /* Even delete should work. */ + for (j = 0, ecode = tdb_firstkey(tdb, &k); + ecode != TDB_ERR_NOEXIST; + j++) { + ok1(ecode == TDB_SUCCESS); + ok1(k.dsize == 4); + ok1(tdb_delete(tdb, k) == 0); + ecode = tdb_nextkey(tdb, &k); + } + + diag("delete using first/nextkey gave %u of %u records", + j, NUM_RECORDS); + ok1(j == NUM_RECORDS); + tdb_close(tdb); + } + + ok1(tap_log_messages == 0); + return exit_status(); +} diff --git a/lib/tdb2/test/run-fork-test.c b/lib/tdb2/test/run-fork-test.c new file mode 100644 index 0000000000..e9813e0a0f --- /dev/null +++ b/lib/tdb2/test/run-fork-test.c @@ -0,0 +1,180 @@ +/* Test forking while holding lock. + * + * There are only five ways to do this currently: + * (1) grab a tdb_chainlock, then fork. + * (2) grab a tdb_lockall, then fork. + * (3) grab a tdb_lockall_read, then fork. + * (4) start a transaction, then fork. + * (5) fork from inside a tdb_parse() callback. + * + * Note that we don't hold a lock across tdb_traverse callbacks, so + * that doesn't matter. + */ +#include <ccan/tdb2/tdb.c> +#include <ccan/tdb2/open.c> +#include <ccan/tdb2/free.c> +#include <ccan/tdb2/lock.c> +#include <ccan/tdb2/io.c> +#include <ccan/tdb2/hash.c> +#include <ccan/tdb2/check.c> +#include <ccan/tdb2/transaction.c> +#include <ccan/tap/tap.h> +#include <sys/types.h> +#include <sys/wait.h> +#include "logging.h" + +static enum TDB_ERROR fork_in_parse(TDB_DATA key, TDB_DATA data, + struct tdb_context *tdb) +{ + int status; + + if (fork() == 0) { + /* We expect this to fail. */ + if (tdb_store(tdb, key, data, TDB_REPLACE) != TDB_ERR_LOCK) + exit(1); + + if (tdb_fetch(tdb, key, &data) != TDB_ERR_LOCK) + exit(1); + + if (tap_log_messages != 2) + exit(2); + + tdb_close(tdb); + if (tap_log_messages != 2) + exit(3); + exit(0); + } + wait(&status); + ok1(WIFEXITED(status) && WEXITSTATUS(status) == 0); + return TDB_SUCCESS; +} + +int main(int argc, char *argv[]) +{ + unsigned int i; + struct tdb_context *tdb; + int flags[] = { TDB_DEFAULT, TDB_NOMMAP, + TDB_CONVERT, TDB_NOMMAP|TDB_CONVERT }; + struct tdb_data key = tdb_mkdata("key", 3); + struct tdb_data data = tdb_mkdata("data", 4); + + plan_tests(sizeof(flags) / sizeof(flags[0]) * 14); + for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { + int status; + + tap_log_messages = 0; + + tdb = tdb_open("run-fork-test.tdb", flags[i], + O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr); + if (!ok1(tdb)) + continue; + + /* Put a record in here. */ + ok1(tdb_store(tdb, key, data, TDB_REPLACE) == TDB_SUCCESS); + + ok1(tdb_chainlock(tdb, key) == TDB_SUCCESS); + if (fork() == 0) { + /* We expect this to fail. */ + if (tdb_store(tdb, key, data, TDB_REPLACE) != TDB_ERR_LOCK) + return 1; + + if (tdb_fetch(tdb, key, &data) != TDB_ERR_LOCK) + return 1; + + if (tap_log_messages != 2) + return 2; + + tdb_chainunlock(tdb, key); + if (tap_log_messages != 3) + return 3; + tdb_close(tdb); + if (tap_log_messages != 3) + return 4; + return 0; + } + wait(&status); + ok1(WIFEXITED(status) && WEXITSTATUS(status) == 0); + tdb_chainunlock(tdb, key); + + ok1(tdb_lockall(tdb) == TDB_SUCCESS); + if (fork() == 0) { + /* We expect this to fail. */ + if (tdb_store(tdb, key, data, TDB_REPLACE) != TDB_ERR_LOCK) + return 1; + + if (tdb_fetch(tdb, key, &data) != TDB_ERR_LOCK) + return 1; + + if (tap_log_messages != 2) + return 2; + + tdb_unlockall(tdb); + if (tap_log_messages != 2) + return 3; + tdb_close(tdb); + if (tap_log_messages != 2) + return 4; + return 0; + } + wait(&status); + ok1(WIFEXITED(status) && WEXITSTATUS(status) == 0); + tdb_unlockall(tdb); + + ok1(tdb_lockall_read(tdb) == TDB_SUCCESS); + if (fork() == 0) { + /* We expect this to fail. */ + /* This would always fail anyway... */ + if (tdb_store(tdb, key, data, TDB_REPLACE) != TDB_ERR_LOCK) + return 1; + + if (tdb_fetch(tdb, key, &data) != TDB_ERR_LOCK) + return 1; + + if (tap_log_messages != 2) + return 2; + + tdb_unlockall_read(tdb); + if (tap_log_messages != 2) + return 3; + tdb_close(tdb); + if (tap_log_messages != 2) + return 4; + return 0; + } + wait(&status); + ok1(WIFEXITED(status) && WEXITSTATUS(status) == 0); + tdb_unlockall_read(tdb); + + ok1(tdb_transaction_start(tdb) == TDB_SUCCESS); + /* If transactions is empty, noop "commit" succeeds. */ + ok1(tdb_delete(tdb, key) == TDB_SUCCESS); + if (fork() == 0) { + /* We expect this to fail. */ + if (tdb_store(tdb, key, data, TDB_REPLACE) != TDB_ERR_LOCK) + return 1; + + if (tdb_fetch(tdb, key, &data) != TDB_ERR_LOCK) + return 1; + + if (tap_log_messages != 2) + return 2; + + if (tdb_transaction_commit(tdb) != TDB_ERR_LOCK) + return 3; + + tdb_close(tdb); + if (tap_log_messages < 3) + return 4; + return 0; + } + wait(&status); + ok1(WIFEXITED(status) && WEXITSTATUS(status) == 0); + tdb_transaction_cancel(tdb); + + ok1(tdb_parse_record(tdb, key, fork_in_parse, tdb) + == TDB_SUCCESS); + tdb_close(tdb); + ok1(tap_log_messages == 0); + } + return exit_status(); +} diff --git a/lib/tdb2/test/run-lockall.c b/lib/tdb2/test/run-lockall.c new file mode 100644 index 0000000000..4aedf59743 --- /dev/null +++ b/lib/tdb2/test/run-lockall.c @@ -0,0 +1,80 @@ +#include "config.h" +#include <unistd.h> +#include "lock-tracking.h" + +#define fcntl fcntl_with_lockcheck + +#include <ccan/tdb2/tdb.c> +#include <ccan/tdb2/open.c> +#include <ccan/tdb2/free.c> +#include <ccan/tdb2/lock.c> +#include <ccan/tdb2/io.c> +#include <ccan/tdb2/hash.c> +#include <ccan/tdb2/check.c> +#include <ccan/tdb2/transaction.c> +#include <ccan/tap/tap.h> +#include <stdlib.h> +#include <stdbool.h> +#include <stdarg.h> +#include <err.h> +#include "external-agent.h" +#include "logging.h" + +#define TEST_DBNAME "run-lockall.tdb" + +#undef fcntl + +int main(int argc, char *argv[]) +{ + struct agent *agent; + const int flags[] = { TDB_DEFAULT, + TDB_NOMMAP, + TDB_CONVERT, + TDB_CONVERT | TDB_NOMMAP }; + int i; + + plan_tests(13 * sizeof(flags)/sizeof(flags[0]) + 1); + agent = prepare_external_agent(); + if (!agent) + err(1, "preparing agent"); + + for (i = 0; i < sizeof(flags)/sizeof(flags[0]); i++) { + enum agent_return ret; + struct tdb_context *tdb; + + tdb = tdb_open(TEST_DBNAME, flags[i], + O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr); + ok1(tdb); + + ret = external_agent_operation(agent, OPEN, TEST_DBNAME); + ok1(ret == SUCCESS); + + ok1(tdb_lockall(tdb) == TDB_SUCCESS); + ok1(external_agent_operation(agent, STORE, "key") + == WOULD_HAVE_BLOCKED); + ok1(external_agent_operation(agent, FETCH, "key") + == WOULD_HAVE_BLOCKED); + /* Test nesting. */ + ok1(tdb_lockall(tdb) == TDB_SUCCESS); + tdb_unlockall(tdb); + tdb_unlockall(tdb); + + ok1(external_agent_operation(agent, STORE, "key") == SUCCESS); + + ok1(tdb_lockall_read(tdb) == TDB_SUCCESS); + ok1(external_agent_operation(agent, STORE, "key") + == WOULD_HAVE_BLOCKED); + ok1(external_agent_operation(agent, FETCH, "key") == SUCCESS); + ok1(tdb_lockall_read(tdb) == TDB_SUCCESS); + tdb_unlockall_read(tdb); + tdb_unlockall_read(tdb); + + ok1(external_agent_operation(agent, STORE, "key") == SUCCESS); + ok1(external_agent_operation(agent, CLOSE, NULL) == SUCCESS); + tdb_close(tdb); + } + + free_external_agent(agent); + ok1(tap_log_messages == 0); + return exit_status(); +} diff --git a/lib/tdb2/test/run-locktimeout.c b/lib/tdb2/test/run-locktimeout.c new file mode 100644 index 0000000000..bb5b5db29b --- /dev/null +++ b/lib/tdb2/test/run-locktimeout.c @@ -0,0 +1,192 @@ +#include <ccan/tdb2/tdb.c> +#include <ccan/tdb2/open.c> +#include <ccan/tdb2/free.c> +#include <ccan/tdb2/lock.c> +#include <ccan/tdb2/io.c> +#include <ccan/tdb2/hash.c> +#include <ccan/tdb2/transaction.c> +#include <ccan/tdb2/check.c> +#include <ccan/tap/tap.h> +#include "logging.h" +#include "external-agent.h" + +#undef alarm +#define alarm fast_alarm + +/* Speed things up by doing things in milliseconds. */ +static unsigned int fast_alarm(unsigned int milli_seconds) +{ + struct itimerval it; + + it.it_interval.tv_sec = it.it_interval.tv_usec = 0; + it.it_value.tv_sec = milli_seconds / 1000; + it.it_value.tv_usec = milli_seconds * 1000; + setitimer(ITIMER_REAL, &it, NULL); + return 0; +} + +#define CatchSignal(sig, handler) signal((sig), (handler)) + +static void do_nothing(int signum) +{ +} + +/* This example code is taken from SAMBA, so try not to change it. */ +static struct flock flock_struct; + +/* Return a value which is none of v1, v2 or v3. */ +static inline short int invalid_value(short int v1, short int v2, short int v3) +{ + short int try = (v1+v2+v3)^((v1+v2+v3) << 16); + while (try == v1 || try == v2 || try == v3) + try++; + return try; +} + +/* We invalidate in as many ways as we can, so the OS rejects it */ +static void invalidate_flock_struct(int signum) +{ + flock_struct.l_type = invalid_value(F_RDLCK, F_WRLCK, F_UNLCK); + flock_struct.l_whence = invalid_value(SEEK_SET, SEEK_CUR, SEEK_END); + flock_struct.l_start = -1; + /* A large negative. */ + flock_struct.l_len = (((off_t)1 << (sizeof(off_t)*CHAR_BIT - 1)) + 1); +} + +static int timeout_lock(int fd, int rw, off_t off, off_t len, bool waitflag, + void *_timeout) +{ + int ret, saved_errno = errno; + unsigned int timeout = *(unsigned int *)_timeout; + + flock_struct.l_type = rw; + flock_struct.l_whence = SEEK_SET; + flock_struct.l_start = off; + flock_struct.l_len = len; + + CatchSignal(SIGALRM, invalidate_flock_struct); + alarm(timeout); + + for (;;) { + if (waitflag) + ret = fcntl(fd, F_SETLKW, &flock_struct); + else + ret = fcntl(fd, F_SETLK, &flock_struct); + + if (ret == 0) + break; + + /* Not signalled? Something else went wrong. */ + if (flock_struct.l_len == len) { + if (errno == EAGAIN || errno == EINTR) + continue; + saved_errno = errno; + break; + } else { + saved_errno = EINTR; + break; + } + } + + alarm(0); + errno = saved_errno; + return ret; +} + +static int tdb_chainlock_with_timeout_internal(struct tdb_context *tdb, + TDB_DATA key, + unsigned int timeout, + int rw_type) +{ + union tdb_attribute locking; + enum TDB_ERROR ecode; + + if (timeout) { + locking.base.attr = TDB_ATTRIBUTE_FLOCK; + ecode = tdb_get_attribute(tdb, &locking); + if (ecode != TDB_SUCCESS) + return ecode; + + /* Replace locking function with our own. */ + locking.flock.data = &timeout; + locking.flock.lock = timeout_lock; + + ecode = tdb_set_attribute(tdb, &locking); + if (ecode != TDB_SUCCESS) + return ecode; + } + if (rw_type == F_RDLCK) + ecode = tdb_chainlock_read(tdb, key); + else + ecode = tdb_chainlock(tdb, key); + + if (timeout) { + tdb_unset_attribute(tdb, TDB_ATTRIBUTE_FLOCK); + } + return ecode; +} + +int main(int argc, char *argv[]) +{ + unsigned int i; + struct tdb_context *tdb; + TDB_DATA key = tdb_mkdata("hello", 5); + int flags[] = { TDB_DEFAULT, TDB_NOMMAP, + TDB_CONVERT, TDB_NOMMAP|TDB_CONVERT }; + struct agent *agent; + + plan_tests(sizeof(flags) / sizeof(flags[0]) * 15); + + agent = prepare_external_agent(); + + for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { + enum TDB_ERROR ecode; + tdb = tdb_open("run-locktimeout.tdb", flags[i], + O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr); + if (!ok1(tdb)) + break; + + /* Simple cases: should succeed. */ + ecode = tdb_chainlock_with_timeout_internal(tdb, key, 20, + F_RDLCK); + ok1(ecode == TDB_SUCCESS); + ok1(tap_log_messages == 0); + + tdb_chainunlock_read(tdb, key); + ok1(tap_log_messages == 0); + + ecode = tdb_chainlock_with_timeout_internal(tdb, key, 20, + F_WRLCK); + ok1(ecode == TDB_SUCCESS); + ok1(tap_log_messages == 0); + + tdb_chainunlock(tdb, key); + ok1(tap_log_messages == 0); + + /* OK, get agent to start transaction, then we should time out. */ + ok1(external_agent_operation(agent, OPEN, "run-locktimeout.tdb") + == SUCCESS); + ok1(external_agent_operation(agent, TRANSACTION_START, "") + == SUCCESS); + ecode = tdb_chainlock_with_timeout_internal(tdb, key, 20, + F_WRLCK); + ok1(ecode == TDB_ERR_LOCK); + ok1(tap_log_messages == 0); + + /* Even if we get a different signal, should be fine. */ + CatchSignal(SIGUSR1, do_nothing); + external_agent_operation(agent, SEND_SIGNAL, ""); + ecode = tdb_chainlock_with_timeout_internal(tdb, key, 20, + F_WRLCK); + ok1(ecode == TDB_ERR_LOCK); + ok1(tap_log_messages == 0); + + ok1(external_agent_operation(agent, TRANSACTION_COMMIT, "") + == SUCCESS); + ok1(external_agent_operation(agent, CLOSE, "") + == SUCCESS); + tdb_close(tdb); + } + free_external_agent(agent); + return exit_status(); +} diff --git a/lib/tdb2/test/run-missing-entries.c b/lib/tdb2/test/run-missing-entries.c new file mode 100644 index 0000000000..e99572f64c --- /dev/null +++ b/lib/tdb2/test/run-missing-entries.c @@ -0,0 +1,48 @@ +/* Another test revealed that we lost an entry. This reproduces it. */ +#include <ccan/tdb2/tdb.c> +#include <ccan/tdb2/open.c> +#include <ccan/tdb2/free.c> +#include <ccan/tdb2/lock.c> +#include <ccan/tdb2/io.c> +#include <ccan/tdb2/hash.c> +#include <ccan/tdb2/check.c> +#include <ccan/tdb2/traverse.c> +#include <ccan/tdb2/transaction.c> +#include <ccan/tap/tap.h> +#include "logging.h" + +#define NUM_RECORDS 1189 + +/* We use the same seed which we saw this failure on. */ +static uint64_t failhash(const void *key, size_t len, uint64_t seed, void *p) +{ + seed = 699537674708983027ULL; + return hash64_stable((const unsigned char *)key, len, seed); +} + +int main(int argc, char *argv[]) +{ + int i; + struct tdb_context *tdb; + struct tdb_data key = { (unsigned char *)&i, sizeof(i) }; + struct tdb_data data = { (unsigned char *)&i, sizeof(i) }; + union tdb_attribute hattr = { .hash = { .base = { TDB_ATTRIBUTE_HASH }, + .fn = failhash } }; + + hattr.base.next = &tap_log_attr; + plan_tests(1 + 2 * NUM_RECORDS + 1); + + tdb = tdb_open("run-missing-entries.tdb", TDB_INTERNAL, + O_RDWR|O_CREAT|O_TRUNC, 0600, &hattr); + ok1(tdb); + if (tdb) { + for (i = 0; i < NUM_RECORDS; i++) { + ok1(tdb_store(tdb, key, data, TDB_REPLACE) == 0); + ok1(tdb_check(tdb, NULL, NULL) == 0); + } + tdb_close(tdb); + } + + ok1(tap_log_messages == 0); + return exit_status(); +} diff --git a/lib/tdb2/test/run-open-multiple-times.c b/lib/tdb2/test/run-open-multiple-times.c new file mode 100644 index 0000000000..240828df16 --- /dev/null +++ b/lib/tdb2/test/run-open-multiple-times.c @@ -0,0 +1,84 @@ +#include <ccan/tdb2/tdb.c> +#include <ccan/tdb2/open.c> +#include <ccan/tdb2/free.c> +#include <ccan/tdb2/lock.c> +#include <ccan/tdb2/io.c> +#include <ccan/tdb2/hash.c> +#include <ccan/tdb2/check.c> +#include <ccan/tdb2/transaction.c> +#include <ccan/tap/tap.h> +#include "logging.h" + +int main(int argc, char *argv[]) +{ + unsigned int i; + struct tdb_context *tdb, *tdb2; + struct tdb_data key = { (unsigned char *)&i, sizeof(i) }; + struct tdb_data data = { (unsigned char *)&i, sizeof(i) }; + struct tdb_data d = { NULL, 0 }; /* Bogus GCC warning */ + int flags[] = { TDB_DEFAULT, TDB_NOMMAP, + TDB_CONVERT, TDB_NOMMAP|TDB_CONVERT }; + + plan_tests(sizeof(flags) / sizeof(flags[0]) * 28); + for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { + tdb = tdb_open("run-open-multiple-times.tdb", flags[i], + O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr); + ok1(tdb); + if (!tdb) + continue; + tdb2 = tdb_open("run-open-multiple-times.tdb", flags[i], + O_RDWR|O_CREAT, 0600, &tap_log_attr); + ok1(tdb_check(tdb, NULL, NULL) == 0); + ok1(tdb_check(tdb2, NULL, NULL) == 0); + + /* Store in one, fetch in the other. */ + ok1(tdb_store(tdb, key, data, TDB_REPLACE) == 0); + ok1(tdb_fetch(tdb2, key, &d) == TDB_SUCCESS); + ok1(tdb_deq(d, data)); + free(d.dptr); + + /* Vice versa, with delete. */ + ok1(tdb_delete(tdb2, key) == 0); + ok1(tdb_fetch(tdb, key, &d) == TDB_ERR_NOEXIST); + + /* OK, now close first one, check second still good. */ + ok1(tdb_close(tdb) == 0); + + ok1(tdb_store(tdb2, key, data, TDB_REPLACE) == 0); + ok1(tdb_fetch(tdb2, key, &d) == TDB_SUCCESS); + ok1(tdb_deq(d, data)); + free(d.dptr); + + /* Reopen */ + tdb = tdb_open("run-open-multiple-times.tdb", flags[i], + O_RDWR|O_CREAT, 0600, &tap_log_attr); + ok1(tdb); + + ok1(tdb_transaction_start(tdb2) == 0); + + /* Anything in the other one should fail. */ + ok1(tdb_fetch(tdb, key, &d) == TDB_ERR_LOCK); + ok1(tap_log_messages == 1); + ok1(tdb_store(tdb, key, data, TDB_REPLACE) == TDB_ERR_LOCK); + ok1(tap_log_messages == 2); + ok1(tdb_transaction_start(tdb) == TDB_ERR_LOCK); + ok1(tap_log_messages == 3); + ok1(tdb_chainlock(tdb, key) == TDB_ERR_LOCK); + ok1(tap_log_messages == 4); + + /* Transaciton should work as normal. */ + ok1(tdb_store(tdb2, key, data, TDB_REPLACE) == TDB_SUCCESS); + + /* Now... try closing with locks held. */ + ok1(tdb_close(tdb2) == 0); + + ok1(tdb_fetch(tdb, key, &d) == TDB_SUCCESS); + ok1(tdb_deq(d, data)); + free(d.dptr); + ok1(tdb_close(tdb) == 0); + ok1(tap_log_messages == 4); + tap_log_messages = 0; + } + + return exit_status(); +} diff --git a/lib/tdb2/test/run-record-expand.c b/lib/tdb2/test/run-record-expand.c new file mode 100644 index 0000000000..109a099278 --- /dev/null +++ b/lib/tdb2/test/run-record-expand.c @@ -0,0 +1,53 @@ +#include <ccan/tdb2/tdb.c> +#include <ccan/tdb2/open.c> +#include <ccan/tdb2/free.c> +#include <ccan/tdb2/lock.c> +#include <ccan/tdb2/io.c> +#include <ccan/tdb2/hash.c> +#include <ccan/tdb2/check.c> +#include <ccan/tdb2/transaction.c> +#include <ccan/tap/tap.h> +#include "logging.h" + +#define MAX_SIZE 10000 +#define SIZE_STEP 131 + +int main(int argc, char *argv[]) +{ + unsigned int i; + struct tdb_context *tdb; + int flags[] = { TDB_INTERNAL, TDB_DEFAULT, TDB_NOMMAP, + TDB_INTERNAL|TDB_CONVERT, TDB_CONVERT, + TDB_NOMMAP|TDB_CONVERT }; + struct tdb_data key = tdb_mkdata("key", 3); + struct tdb_data data; + + data.dptr = malloc(MAX_SIZE); + memset(data.dptr, 0x24, MAX_SIZE); + + plan_tests(sizeof(flags) / sizeof(flags[0]) + * (3 + (1 + (MAX_SIZE/SIZE_STEP)) * 2) + 1); + for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { + tdb = tdb_open("run-record-expand.tdb", flags[i], + O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr); + ok1(tdb); + if (!tdb) + continue; + + data.dsize = 0; + ok1(tdb_store(tdb, key, data, TDB_INSERT) == 0); + ok1(tdb_check(tdb, NULL, NULL) == 0); + for (data.dsize = 0; + data.dsize < MAX_SIZE; + data.dsize += SIZE_STEP) { + memset(data.dptr, data.dsize, data.dsize); + ok1(tdb_store(tdb, key, data, TDB_MODIFY) == 0); + ok1(tdb_check(tdb, NULL, NULL) == 0); + } + tdb_close(tdb); + } + ok1(tap_log_messages == 0); + free(data.dptr); + + return exit_status(); +} diff --git a/lib/tdb2/test/run-remap-in-read_traverse.c b/lib/tdb2/test/run-remap-in-read_traverse.c new file mode 100644 index 0000000000..d784ca3407 --- /dev/null +++ b/lib/tdb2/test/run-remap-in-read_traverse.c @@ -0,0 +1,65 @@ +/* We had a bug where we marked the tdb read-only for a tdb_traverse_read. + * If we then expanded the tdb, we would remap read-only, and later SEGV. */ +#include <ccan/tdb2/tdb.c> +#include <ccan/tdb2/open.c> +#include <ccan/tdb2/free.c> +#include <ccan/tdb2/lock.c> +#include <ccan/tdb2/io.c> +#include <ccan/tdb2/hash.c> +#include <ccan/tdb2/check.c> +#include <ccan/tdb2/traverse.c> +#include <ccan/tdb2/transaction.c> +#include <ccan/tap/tap.h> +#include "external-agent.h" +#include "logging.h" + +static bool file_larger(int fd, tdb_len_t size) +{ + struct stat st; + + fstat(fd, &st); + return st.st_size != size; +} + +static unsigned add_records_to_grow(struct agent *agent, int fd, tdb_len_t size) +{ + unsigned int i; + + for (i = 0; !file_larger(fd, size); i++) { + char data[20]; + sprintf(data, "%i", i); + if (external_agent_operation(agent, STORE, data) != SUCCESS) + return 0; + } + diag("Added %u records to grow file", i); + return i; +} + +int main(int argc, char *argv[]) +{ + unsigned int i; + struct agent *agent; + struct tdb_context *tdb; + struct tdb_data d = tdb_mkdata("hello", 5); + const char filename[] = "run-remap-in-read_traverse.tdb"; + + plan_tests(4); + + agent = prepare_external_agent(); + + tdb = tdb_open(filename, TDB_DEFAULT, + O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr); + + ok1(external_agent_operation(agent, OPEN, filename) == SUCCESS); + i = add_records_to_grow(agent, tdb->file->fd, tdb->file->map_size); + + /* Do a traverse. */ + ok1(tdb_traverse(tdb, NULL, NULL) == i); + + /* Now store something! */ + ok1(tdb_store(tdb, d, d, TDB_INSERT) == 0); + ok1(tap_log_messages == 0); + tdb_close(tdb); + free_external_agent(agent); + return exit_status(); +} diff --git a/lib/tdb2/test/run-seed.c b/lib/tdb2/test/run-seed.c new file mode 100644 index 0000000000..a9b370b6e5 --- /dev/null +++ b/lib/tdb2/test/run-seed.c @@ -0,0 +1,67 @@ +#include <ccan/tdb2/tdb.c> +#include <ccan/tdb2/open.c> +#include <ccan/tdb2/free.c> +#include <ccan/tdb2/lock.c> +#include <ccan/tdb2/io.c> +#include <ccan/tdb2/hash.c> +#include <ccan/tdb2/check.c> +#include <ccan/tdb2/transaction.c> +#include <ccan/tap/tap.h> +#include "logging.h" + +static int log_count = 0; + +/* Normally we get a log when setting random seed. */ +static void my_log_fn(struct tdb_context *tdb, + enum tdb_log_level level, + const char *message, void *priv) +{ + log_count++; +} + +static union tdb_attribute log_attr = { + .log = { .base = { .attr = TDB_ATTRIBUTE_LOG }, + .fn = my_log_fn } +}; + +int main(int argc, char *argv[]) +{ + unsigned int i; + struct tdb_context *tdb; + union tdb_attribute attr; + int flags[] = { TDB_INTERNAL, TDB_DEFAULT, TDB_NOMMAP, + TDB_INTERNAL|TDB_CONVERT, TDB_CONVERT, + TDB_NOMMAP|TDB_CONVERT }; + + attr.seed.base.attr = TDB_ATTRIBUTE_SEED; + attr.seed.base.next = &log_attr; + attr.seed.seed = 42; + + plan_tests(sizeof(flags) / sizeof(flags[0]) * 4 + 4 * 3); + for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { + struct tdb_header hdr; + int fd; + tdb = tdb_open("run-seed.tdb", flags[i], + O_RDWR|O_CREAT|O_TRUNC, 0600, &attr); + ok1(tdb); + if (!tdb) + continue; + ok1(tdb_check(tdb, NULL, NULL) == 0); + ok1(tdb->hash_seed == 42); + ok1(log_count == 0); + tdb_close(tdb); + + if (flags[i] & TDB_INTERNAL) + continue; + + fd = open("run-seed.tdb", O_RDONLY); + ok1(fd >= 0); + ok1(read(fd, &hdr, sizeof(hdr)) == sizeof(hdr)); + if (flags[i] & TDB_CONVERT) + ok1(bswap_64(hdr.hash_seed) == 42); + else + ok1(hdr.hash_seed == 42); + close(fd); + } + return exit_status(); +} diff --git a/lib/tdb2/test/run-simple-delete.c b/lib/tdb2/test/run-simple-delete.c new file mode 100644 index 0000000000..d06bf2d2bd --- /dev/null +++ b/lib/tdb2/test/run-simple-delete.c @@ -0,0 +1,42 @@ +#include <ccan/tdb2/tdb.c> +#include <ccan/tdb2/open.c> +#include <ccan/tdb2/free.c> +#include <ccan/tdb2/lock.c> +#include <ccan/tdb2/io.c> +#include <ccan/tdb2/hash.c> +#include <ccan/tdb2/check.c> +#include <ccan/tdb2/transaction.c> +#include <ccan/tap/tap.h> +#include "logging.h" + +int main(int argc, char *argv[]) +{ + unsigned int i; + struct tdb_context *tdb; + int flags[] = { TDB_INTERNAL, TDB_DEFAULT, TDB_NOMMAP, + TDB_INTERNAL|TDB_CONVERT, TDB_CONVERT, + TDB_NOMMAP|TDB_CONVERT }; + struct tdb_data key = tdb_mkdata("key", 3); + struct tdb_data data = tdb_mkdata("data", 4); + + plan_tests(sizeof(flags) / sizeof(flags[0]) * 7 + 1); + for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { + tdb = tdb_open("run-simple-delete.tdb", flags[i], + O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr); + ok1(tdb); + if (tdb) { + /* Delete should fail. */ + ok1(tdb_delete(tdb, key) == TDB_ERR_NOEXIST); + ok1(tdb_check(tdb, NULL, NULL) == 0); + /* Insert should succeed. */ + ok1(tdb_store(tdb, key, data, TDB_INSERT) == 0); + ok1(tdb_check(tdb, NULL, NULL) == 0); + /* Delete should now work. */ + ok1(tdb_delete(tdb, key) == 0); + ok1(tdb_check(tdb, NULL, NULL) == 0); + tdb_close(tdb); + } + } + ok1(tap_log_messages == 0); + return exit_status(); +} diff --git a/lib/tdb2/test/run-summary.c b/lib/tdb2/test/run-summary.c new file mode 100644 index 0000000000..c92e759373 --- /dev/null +++ b/lib/tdb2/test/run-summary.c @@ -0,0 +1,60 @@ +#include <ccan/tdb2/tdb.c> +#include <ccan/tdb2/open.c> +#include <ccan/tdb2/free.c> +#include <ccan/tdb2/lock.c> +#include <ccan/tdb2/io.c> +#include <ccan/tdb2/hash.c> +#include <ccan/tdb2/check.c> +#include <ccan/tdb2/summary.c> +#include <ccan/tdb2/transaction.c> +#include <ccan/tap/tap.h> +#include "logging.h" + +int main(int argc, char *argv[]) +{ + unsigned int i, j; + struct tdb_context *tdb; + int flags[] = { TDB_INTERNAL, TDB_DEFAULT, TDB_NOMMAP, + TDB_INTERNAL|TDB_CONVERT, TDB_CONVERT, + TDB_NOMMAP|TDB_CONVERT }; + struct tdb_data key = { (unsigned char *)&j, sizeof(j) }; + struct tdb_data data = { (unsigned char *)&j, sizeof(j) }; + char *summary; + + plan_tests(sizeof(flags) / sizeof(flags[0]) * (1 + 2 * 5) + 1); + for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { + tdb = tdb_open("run-summary.tdb", flags[i], + O_RDWR|O_CREAT|O_TRUNC, 0600, &tap_log_attr); + ok1(tdb); + if (!tdb) + continue; + + /* Put some stuff in there. */ + for (j = 0; j < 500; j++) { + /* Make sure padding varies to we get some graphs! */ + data.dsize = j % (sizeof(j) + 1); + if (tdb_store(tdb, key, data, TDB_REPLACE) != 0) + fail("Storing in tdb"); + } + + for (j = 0; + j <= TDB_SUMMARY_HISTOGRAMS; + j += TDB_SUMMARY_HISTOGRAMS) { + ok1(tdb_summary(tdb, j, &summary) == TDB_SUCCESS); + ok1(strstr(summary, "Number of records: 500\n")); + ok1(strstr(summary, "Smallest/average/largest keys: 4/4/4\n")); + ok1(strstr(summary, "Smallest/average/largest data: 0/2/4\n")); + if (j == TDB_SUMMARY_HISTOGRAMS) + ok1(strstr(summary, "|") + && strstr(summary, "*")); + else + ok1(!strstr(summary, "|") + && !strstr(summary, "*")); + free(summary); + } + tdb_close(tdb); + } + + ok1(tap_log_messages == 0); + return exit_status(); +} diff --git a/lib/tdb2/test/run-tdb_errorstr.c b/lib/tdb2/test/run-tdb_errorstr.c new file mode 100644 index 0000000000..27bdfcd67c --- /dev/null +++ b/lib/tdb2/test/run-tdb_errorstr.c @@ -0,0 +1,59 @@ +#include <ccan/tdb2/tdb.c> +#include <ccan/tdb2/open.c> +#include <ccan/tdb2/free.c> +#include <ccan/tdb2/lock.c> +#include <ccan/tdb2/io.c> +#include <ccan/tdb2/hash.c> +#include <ccan/tdb2/check.c> +#include <ccan/tdb2/transaction.c> +#include <ccan/tap/tap.h> + +int main(int argc, char *argv[]) +{ + enum TDB_ERROR err; + plan_tests(TDB_ERR_RDONLY*-1 + 2); + + for (err = TDB_SUCCESS; err >= TDB_ERR_RDONLY; err--) { + switch (err) { + case TDB_SUCCESS: + ok1(!strcmp(tdb_errorstr(err), + "Success")); + break; + case TDB_ERR_IO: + ok1(!strcmp(tdb_errorstr(err), + "IO Error")); + break; + case TDB_ERR_LOCK: + ok1(!strcmp(tdb_errorstr(err), + "Locking error")); + break; + case TDB_ERR_OOM: + ok1(!strcmp(tdb_errorstr(err), + "Out of memory")); + break; + case TDB_ERR_EXISTS: + ok1(!strcmp(tdb_errorstr(err), + "Record exists")); + break; + case TDB_ERR_EINVAL: + ok1(!strcmp(tdb_errorstr(err), + "Invalid parameter")); + break; + case TDB_ERR_NOEXIST: + ok1(!strcmp(tdb_errorstr(err), + "Record does not exist")); + break; + case TDB_ERR_RDONLY: + ok1(!strcmp(tdb_errorstr(err), + "write not permitted")); + break; + case TDB_ERR_CORRUPT: + ok1(!strcmp(tdb_errorstr(err), + "Corrupt database")); + break; + } + } + ok1(!strcmp(tdb_errorstr(err), "Invalid error code")); + + return exit_status(); +} diff --git a/lib/tdb2/test/run-traverse.c b/lib/tdb2/test/run-traverse.c new file mode 100644 index 0000000000..f973d95d0f --- /dev/null +++ b/lib/tdb2/test/run-traverse.c @@ -0,0 +1,211 @@ +#include <ccan/tdb2/tdb.c> +#include <ccan/tdb2/open.c> +#include <ccan/tdb2/free.c> +#include <ccan/tdb2/lock.c> +#include <ccan/tdb2/io.c> +#include <ccan/tdb2/hash.c> +#include <ccan/tdb2/check.c> +#include <ccan/tdb2/traverse.c> +#include <ccan/tdb2/transaction.c> +#include <ccan/tap/tap.h> +#include "logging.h" + +#define NUM_RECORDS 1000 + +/* We use the same seed which we saw a failure on. */ +static uint64_t fixedhash(const void *key, size_t len, uint64_t seed, void *p) +{ + return hash64_stable((const unsigned char *)key, len, + *(uint64_t *)p); +} + +static bool store_records(struct tdb_context *tdb) +{ + int i; + struct tdb_data key = { (unsigned char *)&i, sizeof(i) }; + struct tdb_data data = { (unsigned char *)&i, sizeof(i) }; + + for (i = 0; i < NUM_RECORDS; i++) + if (tdb_store(tdb, key, data, TDB_REPLACE) != 0) + return false; + return true; +} + +struct trav_data { + unsigned int calls, call_limit; + int low, high; + bool mismatch; + bool delete; + enum TDB_ERROR delete_error; +}; + +static int trav(struct tdb_context *tdb, TDB_DATA key, TDB_DATA dbuf, + struct trav_data *td) +{ + int val; + + td->calls++; + if (key.dsize != sizeof(val) || dbuf.dsize != sizeof(val) + || memcmp(key.dptr, dbuf.dptr, key.dsize) != 0) { + td->mismatch = true; + return -1; + } + memcpy(&val, dbuf.dptr, dbuf.dsize); + if (val < td->low) + td->low = val; + if (val > td->high) + td->high = val; + + if (td->delete) { + td->delete_error = tdb_delete(tdb, key); + if (td->delete_error != TDB_SUCCESS) { + return -1; + } + } + + if (td->calls == td->call_limit) + return 1; + return 0; +} + +struct trav_grow_data { + unsigned int calls; + unsigned int num_large; + bool mismatch; + enum TDB_ERROR error; +}; + +static int trav_grow(struct tdb_context *tdb, TDB_DATA key, TDB_DATA dbuf, + struct trav_grow_data *tgd) +{ + int val; + unsigned char buffer[128] = { 0 }; + + tgd->calls++; + if (key.dsize != sizeof(val) || dbuf.dsize < sizeof(val) + || memcmp(key.dptr, dbuf.dptr, key.dsize) != 0) { + tgd->mismatch = true; + return -1; + } + + if (dbuf.dsize > sizeof(val)) + /* We must have seen this before! */ + tgd->num_large++; + + /* Make a big difference to the database. */ + dbuf.dptr = buffer; + dbuf.dsize = sizeof(buffer); + tgd->error = tdb_append(tdb, key, dbuf); + if (tgd->error != TDB_SUCCESS) { + return -1; + } + return 0; +} + +int main(int argc, char *argv[]) +{ + unsigned int i; + int num; + struct trav_data td; + struct trav_grow_data tgd; + struct tdb_context *tdb; + uint64_t seed = 16014841315512641303ULL; + int flags[] = { TDB_INTERNAL, TDB_DEFAULT, TDB_NOMMAP, + TDB_INTERNAL|TDB_CONVERT, TDB_CONVERT, + TDB_NOMMAP|TDB_CONVERT }; + union tdb_attribute hattr = { .hash = { .base = { TDB_ATTRIBUTE_HASH }, + .fn = fixedhash, + .data = &seed } }; + + hattr.base.next = &tap_log_attr; + + plan_tests(sizeof(flags) / sizeof(flags[0]) * 32 + 1); + for (i = 0; i < sizeof(flags) / sizeof(flags[0]); i++) { + tdb = tdb_open("run-traverse.tdb", flags[i], + O_RDWR|O_CREAT|O_TRUNC, 0600, &hattr); + ok1(tdb); + if (!tdb) + continue; + + ok1(tdb_traverse(tdb, NULL, NULL) == 0); + + ok1(store_records(tdb)); + num = tdb_traverse(tdb, NULL, NULL); + ok1(num == NUM_RECORDS); + + /* Full traverse. */ + td.calls = 0; + td.call_limit = UINT_MAX; + td.low = INT_MAX; + td.high = INT_MIN; + td.mismatch = false; + td.delete = false; + + num = tdb_traverse(tdb, trav, &td); + ok1(num == NUM_RECORDS); + ok1(!td.mismatch); + ok1(td.calls == NUM_RECORDS); + ok1(td.low == 0); + ok1(td.high == NUM_RECORDS-1); + + /* Short traverse. */ + td.calls = 0; + td.call_limit = NUM_RECORDS / 2; + td.low = INT_MAX; + td.high = INT_MIN; + td.mismatch = false; + td.delete = false; + + num = tdb_traverse(tdb, trav, &td); + ok1(num == NUM_RECORDS / 2); + ok1(!td.mismatch); + ok1(td.calls == NUM_RECORDS / 2); + ok1(td.low <= NUM_RECORDS / 2); + ok1(td.high > NUM_RECORDS / 2); + ok1(tdb_check(tdb, NULL, NULL) == 0); + ok1(tap_log_messages == 0); + + /* Deleting traverse (delete everything). */ + td.calls = 0; + td.call_limit = UINT_MAX; + td.low = INT_MAX; + td.high = INT_MIN; + td.mismatch = false; + td.delete = true; + td.delete_error = TDB_SUCCESS; + num = tdb_traverse(tdb, trav, &td); + ok1(num == NUM_RECORDS); + ok1(td.delete_error == TDB_SUCCESS); + ok1(!td.mismatch); + ok1(td.calls == NUM_RECORDS); + ok1(td.low == 0); + ok1(td.high == NUM_RECORDS - 1); + ok1(tdb_check(tdb, NULL, NULL) == 0); + + /* Now it's empty! */ + ok1(tdb_traverse(tdb, NULL, NULL) == 0); + + /* Re-add. */ + ok1(store_records(tdb)); + ok1(tdb_traverse(tdb, NULL, NULL) == NUM_RECORDS); + ok1(tdb_check(tdb, NULL, NULL) == 0); + + /* Grow. This will cause us to be reshuffled. */ + tgd.calls = 0; + tgd.num_large = 0; + tgd.mismatch = false; + tgd.error = TDB_SUCCESS; + ok1(tdb_traverse(tdb, trav_grow, &tgd) > 1); + ok1(tgd.error == 0); + ok1(!tgd.mismatch); + ok1(tdb_check(tdb, NULL, NULL) == 0); + ok1(tgd.num_large < tgd.calls); + diag("growing db: %u calls, %u repeats", + tgd.calls, tgd.num_large); + + tdb_close(tdb); + } + + ok1(tap_log_messages == 0); + return exit_status(); +} diff --git a/lib/tdb2/tools/Makefile b/lib/tdb2/tools/Makefile new file mode 100644 index 0000000000..11188c3baf --- /dev/null +++ b/lib/tdb2/tools/Makefile @@ -0,0 +1,16 @@ +OBJS:=../../tdb2.o ../../hash.o ../../tally.o +CFLAGS:=-I../../.. -I.. -Wall -g -O3 #-g -pg +LDFLAGS:=-L../../.. + +default: tdb2torture tdb2tool tdb2dump tdb2restore mktdb2 speed growtdb-bench + +tdb2dump: tdb2dump.c $(OBJS) +tdb2restore: tdb2restore.c $(OBJS) +tdb2torture: tdb2torture.c $(OBJS) +tdb2tool: tdb2tool.c $(OBJS) +mktdb2: mktdb2.c $(OBJS) +speed: speed.c $(OBJS) +growtdb-bench: growtdb-bench.c $(OBJS) + +clean: + rm -f tdb2torture tdb2dump tdb2restore tdb2tool mktdb2 speed growtdb-bench diff --git a/lib/tdb2/tools/growtdb-bench.c b/lib/tdb2/tools/growtdb-bench.c new file mode 100644 index 0000000000..f7f6845a8a --- /dev/null +++ b/lib/tdb2/tools/growtdb-bench.c @@ -0,0 +1,112 @@ +#include "tdb2.h" +#include <stdlib.h> +#include <string.h> +#include <stdio.h> +#include <unistd.h> +#include <err.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> + +static void logfn(struct tdb_context *tdb, + enum tdb_log_level level, + const char *message, + void *data) +{ + fprintf(stderr, "tdb:%s:%s\n", tdb_name(tdb), message); +} + +int main(int argc, char *argv[]) +{ + unsigned int i, j, users, groups; + TDB_DATA idxkey, idxdata; + TDB_DATA k, d, gk; + char cmd[100]; + struct tdb_context *tdb; + enum TDB_ERROR ecode; + union tdb_attribute log; + + if (argc != 3) { + printf("Usage: growtdb-bench <users> <groups>\n"); + exit(1); + } + users = atoi(argv[1]); + groups = atoi(argv[2]); + + sprintf(cmd, "cat /proc/%i/statm", getpid()); + + log.base.attr = TDB_ATTRIBUTE_LOG; + log.base.next = NULL; + log.log.fn = logfn; + + tdb = tdb_open("/tmp/growtdb.tdb", TDB_DEFAULT, + O_RDWR|O_CREAT|O_TRUNC, 0600, &log); + + idxkey.dptr = (unsigned char *)"User index"; + idxkey.dsize = strlen("User index"); + idxdata.dsize = 51; + idxdata.dptr = calloc(idxdata.dsize, 1); + + /* Create users. */ + k.dsize = 48; + k.dptr = calloc(k.dsize, 1); + d.dsize = 64; + d.dptr = calloc(d.dsize, 1); + + tdb_transaction_start(tdb); + for (i = 0; i < users; i++) { + memcpy(k.dptr, &i, sizeof(i)); + ecode = tdb_store(tdb, k, d, TDB_INSERT); + if (ecode != TDB_SUCCESS) + errx(1, "tdb insert failed: %s", tdb_errorstr(ecode)); + + /* This simulates a growing index record. */ + ecode = tdb_append(tdb, idxkey, idxdata); + if (ecode != TDB_SUCCESS) + errx(1, "tdb append failed: %s", tdb_errorstr(ecode)); + } + if ((ecode = tdb_transaction_commit(tdb)) != 0) + errx(1, "tdb commit1 failed: %s", tdb_errorstr(ecode)); + + if ((ecode = tdb_check(tdb, NULL, NULL)) != 0) + errx(1, "tdb_check failed after initial insert!"); + + system(cmd); + + /* Now put them all in groups: add 32 bytes to each record for + * a group. */ + gk.dsize = 48; + gk.dptr = calloc(k.dsize, 1); + gk.dptr[gk.dsize-1] = 1; + + d.dsize = 32; + for (i = 0; i < groups; i++) { + tdb_transaction_start(tdb); + /* Create the "group". */ + memcpy(gk.dptr, &i, sizeof(i)); + ecode = tdb_store(tdb, gk, d, TDB_INSERT); + if (ecode != TDB_SUCCESS) + errx(1, "tdb insert failed: %s", tdb_errorstr(ecode)); + + /* Now populate it. */ + for (j = 0; j < users; j++) { + /* Append to the user. */ + memcpy(k.dptr, &j, sizeof(j)); + if ((ecode = tdb_append(tdb, k, d)) != 0) + errx(1, "tdb append failed: %s", + tdb_errorstr(ecode)); + + /* Append to the group. */ + if ((ecode = tdb_append(tdb, gk, d)) != 0) + errx(1, "tdb append failed: %s", + tdb_errorstr(ecode)); + } + if ((ecode = tdb_transaction_commit(tdb)) != 0) + errx(1, "tdb commit2 failed: %s", tdb_errorstr(ecode)); + if ((ecode = tdb_check(tdb, NULL, NULL)) != 0) + errx(1, "tdb_check failed after iteration %i!", i); + system(cmd); + } + + return 0; +} diff --git a/lib/tdb2/tools/mktdb2.c b/lib/tdb2/tools/mktdb2.c new file mode 100644 index 0000000000..c8c280349e --- /dev/null +++ b/lib/tdb2/tools/mktdb2.c @@ -0,0 +1,29 @@ +#include "tdb2.h" +#include <stdlib.h> +#include <stdio.h> +#include <fcntl.h> +#include <err.h> + +int main(int argc, char *argv[]) +{ + unsigned int i, num_recs; + struct tdb_context *tdb; + + if (argc != 3 || (num_recs = atoi(argv[2])) == 0) + errx(1, "Usage: mktdb <tdbfile> <numrecords>"); + + tdb = tdb_open(argv[1], TDB_DEFAULT, O_CREAT|O_TRUNC|O_RDWR, 0600,NULL); + if (!tdb) + err(1, "Opening %s", argv[1]); + + for (i = 0; i < num_recs; i++) { + TDB_DATA d; + + d.dptr = (void *)&i; + d.dsize = sizeof(i); + if (tdb_store(tdb, d, d, TDB_INSERT) != 0) + err(1, "Failed to store record %i", i); + } + printf("Done\n"); + return 0; +} diff --git a/lib/tdb2/tools/speed.c b/lib/tdb2/tools/speed.c new file mode 100644 index 0000000000..3222465a71 --- /dev/null +++ b/lib/tdb2/tools/speed.c @@ -0,0 +1,440 @@ +/* Simple speed test for TDB */ +#include <err.h> +#include <time.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <unistd.h> +#include <sys/time.h> +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <stdbool.h> +#include "tdb2.h" + +/* Nanoseconds per operation */ +static size_t normalize(const struct timeval *start, + const struct timeval *stop, + unsigned int num) +{ + struct timeval diff; + + timersub(stop, start, &diff); + + /* Floating point is more accurate here. */ + return (double)(diff.tv_sec * 1000000 + diff.tv_usec) + / num * 1000; +} + +static size_t file_size(void) +{ + struct stat st; + + if (stat("/tmp/speed.tdb", &st) != 0) + return -1; + return st.st_size; +} + +static int count_record(struct tdb_context *tdb, + TDB_DATA key, TDB_DATA data, void *p) +{ + int *total = p; + *total += *(int *)data.dptr; + return 0; +} + +static void dump_and_clear_stats(struct tdb_context **tdb, + int flags, + union tdb_attribute *attr) +{ + union tdb_attribute stats; + enum TDB_ERROR ecode; + + stats.base.attr = TDB_ATTRIBUTE_STATS; + stats.stats.size = sizeof(stats.stats); + ecode = tdb_get_attribute(*tdb, &stats); + if (ecode != TDB_SUCCESS) + errx(1, "Getting stats: %s", tdb_errorstr(ecode)); + + printf("allocs = %llu\n", + (unsigned long long)stats.stats.allocs); + printf(" alloc_subhash = %llu\n", + (unsigned long long)stats.stats.alloc_subhash); + printf(" alloc_chain = %llu\n", + (unsigned long long)stats.stats.alloc_chain); + printf(" alloc_bucket_exact = %llu\n", + (unsigned long long)stats.stats.alloc_bucket_exact); + printf(" alloc_bucket_max = %llu\n", + (unsigned long long)stats.stats.alloc_bucket_max); + printf(" alloc_leftover = %llu\n", + (unsigned long long)stats.stats.alloc_leftover); + printf(" alloc_coalesce_tried = %llu\n", + (unsigned long long)stats.stats.alloc_coalesce_tried); + printf(" alloc_coalesce_iterate_clash = %llu\n", + (unsigned long long)stats.stats.alloc_coalesce_iterate_clash); + printf(" alloc_coalesce_lockfail = %llu\n", + (unsigned long long)stats.stats.alloc_coalesce_lockfail); + printf(" alloc_coalesce_race = %llu\n", + (unsigned long long)stats.stats.alloc_coalesce_race); + printf(" alloc_coalesce_succeeded = %llu\n", + (unsigned long long)stats.stats.alloc_coalesce_succeeded); + printf(" alloc_coalesce_num_merged = %llu\n", + (unsigned long long)stats.stats.alloc_coalesce_num_merged); + printf("compares = %llu\n", + (unsigned long long)stats.stats.compares); + printf(" compare_wrong_bucket = %llu\n", + (unsigned long long)stats.stats.compare_wrong_bucket); + printf(" compare_wrong_offsetbits = %llu\n", + (unsigned long long)stats.stats.compare_wrong_offsetbits); + printf(" compare_wrong_keylen = %llu\n", + (unsigned long long)stats.stats.compare_wrong_keylen); + printf(" compare_wrong_rechash = %llu\n", + (unsigned long long)stats.stats.compare_wrong_rechash); + printf(" compare_wrong_keycmp = %llu\n", + (unsigned long long)stats.stats.compare_wrong_keycmp); + printf("transactions = %llu\n", + (unsigned long long)stats.stats.transactions); + printf(" transaction_cancel = %llu\n", + (unsigned long long)stats.stats.transaction_cancel); + printf(" transaction_nest = %llu\n", + (unsigned long long)stats.stats.transaction_nest); + printf(" transaction_expand_file = %llu\n", + (unsigned long long)stats.stats.transaction_expand_file); + printf(" transaction_read_direct = %llu\n", + (unsigned long long)stats.stats.transaction_read_direct); + printf(" transaction_read_direct_fail = %llu\n", + (unsigned long long)stats.stats.transaction_read_direct_fail); + printf(" transaction_write_direct = %llu\n", + (unsigned long long)stats.stats.transaction_write_direct); + printf(" transaction_write_direct_fail = %llu\n", + (unsigned long long)stats.stats.transaction_write_direct_fail); + printf("expands = %llu\n", + (unsigned long long)stats.stats.expands); + printf("frees = %llu\n", + (unsigned long long)stats.stats.frees); + printf("locks = %llu\n", + (unsigned long long)stats.stats.locks); + printf(" lock_lowlevel = %llu\n", + (unsigned long long)stats.stats.lock_lowlevel); + printf(" lock_nonblock = %llu\n", + (unsigned long long)stats.stats.lock_nonblock); + printf(" lock_nonblock_fail = %llu\n", + (unsigned long long)stats.stats.lock_nonblock_fail); + + /* Now clear. */ + tdb_close(*tdb); + *tdb = tdb_open("/tmp/speed.tdb", flags, O_RDWR, 0, attr); +} + +static void tdb_log(struct tdb_context *tdb, enum tdb_log_level level, + const char *message, void *data) +{ + fputs(message, stderr); + putc('\n', stderr); +} + +int main(int argc, char *argv[]) +{ + unsigned int i, j, num = 1000, stage = 0, stopat = -1; + int flags = TDB_DEFAULT; + bool transaction = false, summary = false; + TDB_DATA key, data; + struct tdb_context *tdb; + struct timeval start, stop; + union tdb_attribute seed, log; + bool do_stats = false; + enum TDB_ERROR ecode; + + /* Try to keep benchmarks even. */ + seed.base.attr = TDB_ATTRIBUTE_SEED; + seed.base.next = NULL; + seed.seed.seed = 0; + + log.base.attr = TDB_ATTRIBUTE_LOG; + log.base.next = &seed; + log.log.fn = tdb_log; + + if (argv[1] && strcmp(argv[1], "--internal") == 0) { + flags = TDB_INTERNAL; + argc--; + argv++; + } + if (argv[1] && strcmp(argv[1], "--transaction") == 0) { + transaction = true; + argc--; + argv++; + } + if (argv[1] && strcmp(argv[1], "--no-sync") == 0) { + flags |= TDB_NOSYNC; + argc--; + argv++; + } + if (argv[1] && strcmp(argv[1], "--summary") == 0) { + summary = true; + argc--; + argv++; + } + if (argv[1] && strcmp(argv[1], "--stats") == 0) { + do_stats = true; + argc--; + argv++; + } + + tdb = tdb_open("/tmp/speed.tdb", flags, O_RDWR|O_CREAT|O_TRUNC, + 0600, &log); + if (!tdb) + err(1, "Opening /tmp/speed.tdb"); + + key.dptr = (void *)&i; + key.dsize = sizeof(i); + data = key; + + if (argv[1]) { + num = atoi(argv[1]); + argv++; + argc--; + } + + if (argv[1]) { + stopat = atoi(argv[1]); + argv++; + argc--; + } + + /* Add 1000 records. */ + printf("Adding %u records: ", num); fflush(stdout); + if (transaction && (ecode = tdb_transaction_start(tdb))) + errx(1, "starting transaction: %s", tdb_errorstr(ecode)); + gettimeofday(&start, NULL); + for (i = 0; i < num; i++) + if ((ecode = tdb_store(tdb, key, data, TDB_INSERT)) != 0) + errx(1, "Inserting key %u in tdb: %s", + i, tdb_errorstr(ecode)); + gettimeofday(&stop, NULL); + if (transaction && (ecode = tdb_transaction_commit(tdb))) + errx(1, "committing transaction: %s", tdb_errorstr(ecode)); + printf(" %zu ns (%zu bytes)\n", + normalize(&start, &stop, num), file_size()); + + if (tdb_check(tdb, NULL, NULL)) + errx(1, "tdb_check failed!"); + if (summary) { + char *sumstr = NULL; + tdb_summary(tdb, TDB_SUMMARY_HISTOGRAMS, &sumstr); + printf("%s\n", sumstr); + free(sumstr); + } + if (do_stats) + dump_and_clear_stats(&tdb, flags, &log); + + if (++stage == stopat) + exit(0); + + /* Finding 1000 records. */ + printf("Finding %u records: ", num); fflush(stdout); + if (transaction && (ecode = tdb_transaction_start(tdb))) + errx(1, "starting transaction: %s", tdb_errorstr(ecode)); + gettimeofday(&start, NULL); + for (i = 0; i < num; i++) { + struct tdb_data dbuf; + if ((ecode = tdb_fetch(tdb, key, &dbuf)) != TDB_SUCCESS + || *(int *)dbuf.dptr != i) { + errx(1, "Fetching key %u in tdb gave %u", + i, ecode ? ecode : *(int *)dbuf.dptr); + } + } + gettimeofday(&stop, NULL); + if (transaction && (ecode = tdb_transaction_commit(tdb))) + errx(1, "committing transaction: %s", tdb_errorstr(ecode)); + printf(" %zu ns (%zu bytes)\n", + normalize(&start, &stop, num), file_size()); + if (tdb_check(tdb, NULL, NULL)) + errx(1, "tdb_check failed!"); + if (summary) { + char *sumstr = NULL; + tdb_summary(tdb, TDB_SUMMARY_HISTOGRAMS, &sumstr); + printf("%s\n", sumstr); + free(sumstr); + } + if (do_stats) + dump_and_clear_stats(&tdb, flags, &log); + if (++stage == stopat) + exit(0); + + /* Missing 1000 records. */ + printf("Missing %u records: ", num); fflush(stdout); + if (transaction && (ecode = tdb_transaction_start(tdb))) + errx(1, "starting transaction: %s", tdb_errorstr(ecode)); + gettimeofday(&start, NULL); + for (i = num; i < num*2; i++) { + struct tdb_data dbuf; + ecode = tdb_fetch(tdb, key, &dbuf); + if (ecode != TDB_ERR_NOEXIST) + errx(1, "Fetching key %u in tdb gave %s", + i, tdb_errorstr(ecode)); + } + gettimeofday(&stop, NULL); + if (transaction && (ecode = tdb_transaction_commit(tdb))) + errx(1, "committing transaction: %s", tdb_errorstr(ecode)); + printf(" %zu ns (%zu bytes)\n", + normalize(&start, &stop, num), file_size()); + if (tdb_check(tdb, NULL, NULL)) + errx(1, "tdb_check failed!"); + if (summary) { + char *sumstr = NULL; + tdb_summary(tdb, TDB_SUMMARY_HISTOGRAMS, &sumstr); + printf("%s\n", sumstr); + free(sumstr); + } + if (do_stats) + dump_and_clear_stats(&tdb, flags, &log); + if (++stage == stopat) + exit(0); + + /* Traverse 1000 records. */ + printf("Traversing %u records: ", num); fflush(stdout); + if (transaction && (ecode = tdb_transaction_start(tdb))) + errx(1, "starting transaction: %s", tdb_errorstr(ecode)); + i = 0; + gettimeofday(&start, NULL); + if (tdb_traverse(tdb, count_record, &i) != num) + errx(1, "Traverse returned wrong number of records"); + if (i != (num - 1) * (num / 2)) + errx(1, "Traverse tallied to %u", i); + gettimeofday(&stop, NULL); + if (transaction && (ecode = tdb_transaction_commit(tdb))) + errx(1, "committing transaction: %s", tdb_errorstr(ecode)); + printf(" %zu ns (%zu bytes)\n", + normalize(&start, &stop, num), file_size()); + if (tdb_check(tdb, NULL, NULL)) + errx(1, "tdb_check failed!"); + if (summary) { + char *sumstr = NULL; + tdb_summary(tdb, TDB_SUMMARY_HISTOGRAMS, &sumstr); + printf("%s\n", sumstr); + free(sumstr); + } + if (do_stats) + dump_and_clear_stats(&tdb, flags, &log); + if (++stage == stopat) + exit(0); + + /* Delete 1000 records (not in order). */ + printf("Deleting %u records: ", num); fflush(stdout); + if (transaction && (ecode = tdb_transaction_start(tdb))) + errx(1, "starting transaction: %s", tdb_errorstr(ecode)); + gettimeofday(&start, NULL); + for (j = 0; j < num; j++) { + i = (j + 100003) % num; + if ((ecode = tdb_delete(tdb, key)) != TDB_SUCCESS) + errx(1, "Deleting key %u in tdb: %s", + i, tdb_errorstr(ecode)); + } + gettimeofday(&stop, NULL); + if (transaction && (ecode = tdb_transaction_commit(tdb))) + errx(1, "committing transaction: %s", tdb_errorstr(ecode)); + printf(" %zu ns (%zu bytes)\n", + normalize(&start, &stop, num), file_size()); + if (tdb_check(tdb, NULL, NULL)) + errx(1, "tdb_check failed!"); + if (summary) { + char *sumstr = NULL; + tdb_summary(tdb, TDB_SUMMARY_HISTOGRAMS, &sumstr); + printf("%s\n", sumstr); + free(sumstr); + } + if (do_stats) + dump_and_clear_stats(&tdb, flags, &log); + if (++stage == stopat) + exit(0); + + /* Re-add 1000 records (not in order). */ + printf("Re-adding %u records: ", num); fflush(stdout); + if (transaction && (ecode = tdb_transaction_start(tdb))) + errx(1, "starting transaction: %s", tdb_errorstr(ecode)); + gettimeofday(&start, NULL); + for (j = 0; j < num; j++) { + i = (j + 100003) % num; + if ((ecode = tdb_store(tdb, key, data, TDB_INSERT)) != 0) + errx(1, "Inserting key %u in tdb: %s", + i, tdb_errorstr(ecode)); + } + gettimeofday(&stop, NULL); + if (transaction && (ecode = tdb_transaction_commit(tdb))) + errx(1, "committing transaction: %s", tdb_errorstr(ecode)); + printf(" %zu ns (%zu bytes)\n", + normalize(&start, &stop, num), file_size()); + if (tdb_check(tdb, NULL, NULL)) + errx(1, "tdb_check failed!"); + if (summary) { + char *sumstr = NULL; + tdb_summary(tdb, TDB_SUMMARY_HISTOGRAMS, &sumstr); + printf("%s\n", sumstr); + free(sumstr); + } + if (do_stats) + dump_and_clear_stats(&tdb, flags, &log); + if (++stage == stopat) + exit(0); + + /* Append 1000 records. */ + if (transaction && (ecode = tdb_transaction_start(tdb))) + errx(1, "starting transaction: %s", tdb_errorstr(ecode)); + printf("Appending %u records: ", num); fflush(stdout); + gettimeofday(&start, NULL); + for (i = 0; i < num; i++) + if ((ecode = tdb_append(tdb, key, data)) != TDB_SUCCESS) + errx(1, "Appending key %u in tdb: %s", + i, tdb_errorstr(ecode)); + gettimeofday(&stop, NULL); + if (transaction && (ecode = tdb_transaction_commit(tdb))) + errx(1, "committing transaction: %s", tdb_errorstr(ecode)); + printf(" %zu ns (%zu bytes)\n", + normalize(&start, &stop, num), file_size()); + if (tdb_check(tdb, NULL, NULL)) + errx(1, "tdb_check failed!"); + if (summary) { + char *sumstr = NULL; + tdb_summary(tdb, TDB_SUMMARY_HISTOGRAMS, &sumstr); + printf("%s\n", sumstr); + free(sumstr); + } + if (++stage == stopat) + exit(0); + + /* Churn 1000 records: not in order! */ + if (transaction && (ecode = tdb_transaction_start(tdb))) + errx(1, "starting transaction: %s", tdb_errorstr(ecode)); + printf("Churning %u records: ", num); fflush(stdout); + gettimeofday(&start, NULL); + for (j = 0; j < num; j++) { + i = (j + 1000019) % num; + if ((ecode = tdb_delete(tdb, key)) != TDB_SUCCESS) + errx(1, "Deleting key %u in tdb: %s", + i, tdb_errorstr(ecode)); + i += num; + if ((ecode = tdb_store(tdb, key, data, TDB_INSERT)) != 0) + errx(1, "Inserting key %u in tdb: %s", + i, tdb_errorstr(ecode)); + } + gettimeofday(&stop, NULL); + if (transaction && (ecode = tdb_transaction_commit(tdb))) + errx(1, "committing transaction: %s", tdb_errorstr(ecode)); + printf(" %zu ns (%zu bytes)\n", + normalize(&start, &stop, num), file_size()); + + if (tdb_check(tdb, NULL, NULL)) + errx(1, "tdb_check failed!"); + if (summary) { + char *sumstr = NULL; + tdb_summary(tdb, TDB_SUMMARY_HISTOGRAMS, &sumstr); + printf("%s\n", sumstr); + free(sumstr); + } + if (do_stats) + dump_and_clear_stats(&tdb, flags, &log); + if (++stage == stopat) + exit(0); + + return 0; +} diff --git a/lib/tdb2/tools/tdb2dump.c b/lib/tdb2/tools/tdb2dump.c new file mode 100644 index 0000000000..abe1d9b871 --- /dev/null +++ b/lib/tdb2/tools/tdb2dump.c @@ -0,0 +1,115 @@ +/* + simple tdb2 dump util + Copyright (C) Andrew Tridgell 2001 + Copyright (C) Rusty Russell 2011 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ +#include "tdb2.h" +#include <ctype.h> +#include <stdio.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <stdlib.h> +#include <unistd.h> + +static void print_data(TDB_DATA d) +{ + unsigned char *p = (unsigned char *)d.dptr; + int len = d.dsize; + while (len--) { + if (isprint(*p) && !strchr("\"\\", *p)) { + fputc(*p, stdout); + } else { + printf("\\%02X", *p); + } + p++; + } +} + +static int traverse_fn(struct tdb_context *tdb, TDB_DATA key, TDB_DATA dbuf, void *state) +{ + printf("{\n"); + printf("key(%d) = \"", (int)key.dsize); + print_data(key); + printf("\"\n"); + printf("data(%d) = \"", (int)dbuf.dsize); + print_data(dbuf); + printf("\"\n"); + printf("}\n"); + return 0; +} + +static int dump_tdb(const char *fname, const char *keyname) +{ + struct tdb_context *tdb; + TDB_DATA key, value; + + tdb = tdb_open(fname, 0, O_RDONLY, 0, NULL); + if (!tdb) { + printf("Failed to open %s\n", fname); + return 1; + } + + if (!keyname) { + tdb_traverse(tdb, traverse_fn, NULL); + } else { + key = tdb_mkdata(keyname, strlen(keyname)); + if (tdb_fetch(tdb, key, &value) != 0) { + return 1; + } else { + print_data(value); + free(value.dptr); + } + } + + return 0; +} + +static void usage( void) +{ + printf( "Usage: tdb2dump [options] <filename>\n\n"); + printf( " -h this help message\n"); + printf( " -k keyname dumps value of keyname\n"); +} + + int main(int argc, char *argv[]) +{ + char *fname, *keyname=NULL; + int c; + + if (argc < 2) { + printf("Usage: tdb2dump <fname>\n"); + exit(1); + } + + while ((c = getopt( argc, argv, "hk:")) != -1) { + switch (c) { + case 'h': + usage(); + exit( 0); + case 'k': + keyname = optarg; + break; + default: + usage(); + exit( 1); + } + } + + fname = argv[optind]; + + return dump_tdb(fname, keyname); +} diff --git a/lib/tdb2/tools/tdb2restore.c b/lib/tdb2/tools/tdb2restore.c new file mode 100644 index 0000000000..658215a16c --- /dev/null +++ b/lib/tdb2/tools/tdb2restore.c @@ -0,0 +1,227 @@ +/* + tdb2restore -- construct a tdb from tdbdump output. + Copyright (C) Volker Lendecke 2010 + Copyright (C) Simon McVittie 2005 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include "tdb2.h" +#include <assert.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> + +#define debug_fprintf(file, fmt, ...) do {/*nothing*/} while (0) + +static int read_linehead(FILE *f) +{ + int i, c; + int num_bytes; + char prefix[128]; + + while (1) { + c = getc(f); + if (c == EOF) { + return -1; + } + if (c == '(') { + break; + } + } + for (i=0; i<sizeof(prefix); i++) { + c = getc(f); + if (c == EOF) { + return -1; + } + prefix[i] = c; + if (c == '"') { + break; + } + } + if (i == sizeof(prefix)) { + return -1; + } + prefix[i] = '\0'; + + if (sscanf(prefix, "%d) = ", &num_bytes) != 1) { + return -1; + } + return num_bytes; +} + +static int read_hex(void) { + int c; + c = getchar(); + if (c == EOF) { + fprintf(stderr, "Unexpected EOF in data\n"); + return -1; + } else if (c == '"') { + fprintf(stderr, "Unexpected \\\" sequence\n"); + return -1; + } else if ('0' <= c && c <= '9') { + return c - '0'; + } else if ('A' <= c && c <= 'F') { + return c - 'A' + 10; + } else if ('a' <= c && c <= 'f') { + return c - 'a' + 10; + } else { + fprintf(stderr, "Invalid hex: %c\n", c); + return -1; + } +} + +static int read_data(FILE *f, struct tdb_data *d, size_t size) { + int c, low, high; + int i; + + d->dptr = (unsigned char *)malloc(size); + if (d->dptr == NULL) { + return -1; + } + d->dsize = size; + + for (i=0; i<size; i++) { + c = getc(f); + if (c == EOF) { + fprintf(stderr, "Unexpected EOF in data\n"); + return 1; + } else if (c == '"') { + return 0; + } else if (c == '\\') { + high = read_hex(); + if (high < 0) { + return -1; + } + high = high << 4; + assert(high == (high & 0xf0)); + low = read_hex(); + if (low < 0) { + return -1; + } + assert(low == (low & 0x0f)); + d->dptr[i] = (low|high); + } else { + d->dptr[i] = c; + } + } + return 0; +} + +static int swallow(FILE *f, const char *s, int *eof) +{ + char line[128]; + + if (fgets(line, sizeof(line), f) == NULL) { + if (eof != NULL) { + *eof = 1; + } + return -1; + } + if (strcmp(line, s) != 0) { + return -1; + } + return 0; +} + +static bool read_rec(FILE *f, struct tdb_context *tdb, int *eof) +{ + int length; + struct tdb_data key, data; + bool ret = false; + enum TDB_ERROR e; + + key.dptr = NULL; + data.dptr = NULL; + + if (swallow(f, "{\n", eof) == -1) { + goto fail; + } + length = read_linehead(f); + if (length == -1) { + goto fail; + } + if (read_data(f, &key, length) == -1) { + goto fail; + } + if (swallow(f, "\"\n", NULL) == -1) { + goto fail; + } + length = read_linehead(f); + if (length == -1) { + goto fail; + } + if (read_data(f, &data, length) == -1) { + goto fail; + } + if ((swallow(f, "\"\n", NULL) == -1) + || (swallow(f, "}\n", NULL) == -1)) { + goto fail; + } + e = tdb_store(tdb, key, data, TDB_INSERT); + if (e != TDB_SUCCESS) { + fprintf(stderr, "TDB error: %s\n", tdb_errorstr(e)); + goto fail; + } + + ret = true; +fail: + free(key.dptr); + free(data.dptr); + return ret; +} + +static int restore_tdb(const char *fname) +{ + struct tdb_context *tdb; + + tdb = tdb_open(fname, 0, O_RDWR|O_CREAT|O_EXCL, 0666, NULL); + if (!tdb) { + perror("tdb_open"); + fprintf(stderr, "Failed to open %s\n", fname); + return 1; + } + + while (1) { + int eof = 0; + if (!read_rec(stdin, tdb, &eof)) { + if (eof) { + break; + } + return 1; + } + } + if (tdb_close(tdb)) { + fprintf(stderr, "Error closing tdb\n"); + return 1; + } + fprintf(stderr, "EOF\n"); + return 0; +} + +int main(int argc, char *argv[]) +{ + char *fname; + + if (argc < 2) { + printf("Usage: %s dbname < tdbdump_output\n", argv[0]); + exit(1); + } + + fname = argv[1]; + + return restore_tdb(fname); +} diff --git a/lib/tdb2/tools/tdb2tool.c b/lib/tdb2/tools/tdb2tool.c new file mode 100644 index 0000000000..cd301c80b7 --- /dev/null +++ b/lib/tdb2/tools/tdb2tool.c @@ -0,0 +1,798 @@ +/* + Unix SMB/CIFS implementation. + Samba database functions + Copyright (C) Andrew Tridgell 1999-2000 + Copyright (C) Paul `Rusty' Russell 2000 + Copyright (C) Jeremy Allison 2000 + Copyright (C) Andrew Esh 2001 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include "tdb2.h" +#include <stdlib.h> +#include <stdio.h> +#include <ctype.h> +#include <sys/time.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <errno.h> +#include <string.h> +#include <stdarg.h> + +static int do_command(void); +const char *cmdname; +char *arg1, *arg2; +size_t arg1len, arg2len; +int bIterate = 0; +char *line; +TDB_DATA iterate_kbuf; +char cmdline[1024]; +static int disable_mmap; + +enum commands { + CMD_CREATE_TDB, + CMD_OPEN_TDB, + CMD_TRANSACTION_START, + CMD_TRANSACTION_COMMIT, + CMD_TRANSACTION_CANCEL, + CMD_ERASE, + CMD_DUMP, + CMD_INSERT, + CMD_MOVE, + CMD_STORE, + CMD_SHOW, + CMD_KEYS, + CMD_HEXKEYS, + CMD_DELETE, +#if 0 + CMD_LIST_HASH_FREE, + CMD_LIST_FREE, +#endif + CMD_INFO, + CMD_MMAP, + CMD_SPEED, + CMD_FIRST, + CMD_NEXT, + CMD_SYSTEM, + CMD_CHECK, + CMD_QUIT, + CMD_HELP +}; + +typedef struct { + const char *name; + enum commands cmd; +} COMMAND_TABLE; + +COMMAND_TABLE cmd_table[] = { + {"create", CMD_CREATE_TDB}, + {"open", CMD_OPEN_TDB}, +#if 0 + {"transaction_start", CMD_TRANSACTION_START}, + {"transaction_commit", CMD_TRANSACTION_COMMIT}, + {"transaction_cancel", CMD_TRANSACTION_CANCEL}, +#endif + {"erase", CMD_ERASE}, + {"dump", CMD_DUMP}, + {"insert", CMD_INSERT}, + {"move", CMD_MOVE}, + {"store", CMD_STORE}, + {"show", CMD_SHOW}, + {"keys", CMD_KEYS}, + {"hexkeys", CMD_HEXKEYS}, + {"delete", CMD_DELETE}, +#if 0 + {"list", CMD_LIST_HASH_FREE}, + {"free", CMD_LIST_FREE}, +#endif + {"info", CMD_INFO}, + {"speed", CMD_SPEED}, + {"mmap", CMD_MMAP}, + {"first", CMD_FIRST}, + {"1", CMD_FIRST}, + {"next", CMD_NEXT}, + {"n", CMD_NEXT}, + {"check", CMD_CHECK}, + {"quit", CMD_QUIT}, + {"q", CMD_QUIT}, + {"!", CMD_SYSTEM}, + {NULL, CMD_HELP} +}; + +struct timeval tp1,tp2; + +static void _start_timer(void) +{ + gettimeofday(&tp1,NULL); +} + +static double _end_timer(void) +{ + gettimeofday(&tp2,NULL); + return((tp2.tv_sec - tp1.tv_sec) + + (tp2.tv_usec - tp1.tv_usec)*1.0e-6); +} + +static void tdb_log(struct tdb_context *tdb, enum tdb_log_level level, + const char *message, void *priv) +{ + fputs(message, stderr); +} + +/* a tdb tool for manipulating a tdb database */ + +static struct tdb_context *tdb; + +static int print_rec(struct tdb_context *the_tdb, TDB_DATA key, TDB_DATA dbuf, void *state); +static int print_key(struct tdb_context *the_tdb, TDB_DATA key, TDB_DATA dbuf, void *state); +static int print_hexkey(struct tdb_context *the_tdb, TDB_DATA key, TDB_DATA dbuf, void *state); + +static void print_asc(const char *buf,int len) +{ + int i; + + /* We're probably printing ASCII strings so don't try to display + the trailing NULL character. */ + + if (buf[len - 1] == 0) + len--; + + for (i=0;i<len;i++) + printf("%c",isprint(buf[i])?buf[i]:'.'); +} + +static void print_data(const char *buf,int len) +{ + int i=0; + if (len<=0) return; + printf("[%03X] ",i); + for (i=0;i<len;) { + printf("%02X ",(int)((unsigned char)buf[i])); + i++; + if (i%8 == 0) printf(" "); + if (i%16 == 0) { + print_asc(&buf[i-16],8); printf(" "); + print_asc(&buf[i-8],8); printf("\n"); + if (i<len) printf("[%03X] ",i); + } + } + if (i%16) { + int n; + + n = 16 - (i%16); + printf(" "); + if (n>8) printf(" "); + while (n--) printf(" "); + + n = i%16; + if (n > 8) n = 8; + print_asc(&buf[i-(i%16)],n); printf(" "); + n = (i%16) - n; + if (n>0) print_asc(&buf[i-n],n); + printf("\n"); + } +} + +static void help(void) +{ + printf("\n" +"tdbtool: \n" +" create dbname : create a database\n" +" open dbname : open an existing database\n" +" openjh dbname : open an existing database (jenkins hash)\n" +" transaction_start : start a transaction\n" +" transaction_commit : commit a transaction\n" +" transaction_cancel : cancel a transaction\n" +" erase : erase the database\n" +" dump : dump the database as strings\n" +" keys : dump the database keys as strings\n" +" hexkeys : dump the database keys as hex values\n" +" info : print summary info about the database\n" +" insert key data : insert a record\n" +" move key file : move a record to a destination tdb\n" +" store key data : store a record (replace)\n" +" show key : show a record by key\n" +" delete key : delete a record by key\n" +#if 0 +" list : print the database hash table and freelist\n" +" free : print the database freelist\n" +#endif +" check : check the integrity of an opened database\n" +" speed : perform speed tests on the database\n" +" ! command : execute system command\n" +" 1 | first : print the first record\n" +" n | next : print the next record\n" +" q | quit : terminate\n" +" \\n : repeat 'next' command\n" +"\n"); +} + +static void terror(enum TDB_ERROR err, const char *why) +{ + if (err != TDB_SUCCESS) + printf("%s:%s\n", tdb_errorstr(err), why); + else + printf("%s\n", why); +} + +static void create_tdb(const char *tdbname) +{ + union tdb_attribute log_attr; + log_attr.base.attr = TDB_ATTRIBUTE_LOG; + log_attr.base.next = NULL; + log_attr.log.fn = tdb_log; + + if (tdb) tdb_close(tdb); + tdb = tdb_open(tdbname, (disable_mmap?TDB_NOMMAP:0), + O_RDWR | O_CREAT | O_TRUNC, 0600, &log_attr); + if (!tdb) { + printf("Could not create %s: %s\n", tdbname, strerror(errno)); + } +} + +static void open_tdb(const char *tdbname) +{ + union tdb_attribute log_attr; + log_attr.base.attr = TDB_ATTRIBUTE_LOG; + log_attr.base.next = NULL; + log_attr.log.fn = tdb_log; + + if (tdb) tdb_close(tdb); + tdb = tdb_open(tdbname, disable_mmap?TDB_NOMMAP:0, O_RDWR, 0600, + &log_attr); + if (!tdb) { + printf("Could not open %s: %s\n", tdbname, strerror(errno)); + } +} + +static void insert_tdb(char *keyname, size_t keylen, char* data, size_t datalen) +{ + TDB_DATA key, dbuf; + enum TDB_ERROR ecode; + + if ((keyname == NULL) || (keylen == 0)) { + terror(TDB_SUCCESS, "need key"); + return; + } + + key.dptr = (unsigned char *)keyname; + key.dsize = keylen; + dbuf.dptr = (unsigned char *)data; + dbuf.dsize = datalen; + + ecode = tdb_store(tdb, key, dbuf, TDB_INSERT); + if (ecode) { + terror(ecode, "insert failed"); + } +} + +static void store_tdb(char *keyname, size_t keylen, char* data, size_t datalen) +{ + TDB_DATA key, dbuf; + enum TDB_ERROR ecode; + + if ((keyname == NULL) || (keylen == 0)) { + terror(TDB_SUCCESS, "need key"); + return; + } + + if ((data == NULL) || (datalen == 0)) { + terror(TDB_SUCCESS, "need data"); + return; + } + + key.dptr = (unsigned char *)keyname; + key.dsize = keylen; + dbuf.dptr = (unsigned char *)data; + dbuf.dsize = datalen; + + printf("Storing key:\n"); + print_rec(tdb, key, dbuf, NULL); + + ecode = tdb_store(tdb, key, dbuf, TDB_REPLACE); + if (ecode) { + terror(ecode, "store failed"); + } +} + +static void show_tdb(char *keyname, size_t keylen) +{ + TDB_DATA key, dbuf; + enum TDB_ERROR ecode; + + if ((keyname == NULL) || (keylen == 0)) { + terror(TDB_SUCCESS, "need key"); + return; + } + + key.dptr = (unsigned char *)keyname; + key.dsize = keylen; + + ecode = tdb_fetch(tdb, key, &dbuf); + if (ecode) { + terror(ecode, "fetch failed"); + return; + } + + print_rec(tdb, key, dbuf, NULL); + + free( dbuf.dptr ); +} + +static void delete_tdb(char *keyname, size_t keylen) +{ + TDB_DATA key; + enum TDB_ERROR ecode; + + if ((keyname == NULL) || (keylen == 0)) { + terror(TDB_SUCCESS, "need key"); + return; + } + + key.dptr = (unsigned char *)keyname; + key.dsize = keylen; + + ecode = tdb_delete(tdb, key); + if (ecode) { + terror(ecode, "delete failed"); + } +} + +static void move_rec(char *keyname, size_t keylen, char* tdbname) +{ + TDB_DATA key, dbuf; + struct tdb_context *dst_tdb; + enum TDB_ERROR ecode; + + if ((keyname == NULL) || (keylen == 0)) { + terror(TDB_SUCCESS, "need key"); + return; + } + + if ( !tdbname ) { + terror(TDB_SUCCESS, "need destination tdb name"); + return; + } + + key.dptr = (unsigned char *)keyname; + key.dsize = keylen; + + ecode = tdb_fetch(tdb, key, &dbuf); + if (ecode) { + terror(ecode, "fetch failed"); + return; + } + + print_rec(tdb, key, dbuf, NULL); + + dst_tdb = tdb_open(tdbname, 0, O_RDWR, 0600, NULL); + if ( !dst_tdb ) { + terror(TDB_SUCCESS, "unable to open destination tdb"); + return; + } + + ecode = tdb_store( dst_tdb, key, dbuf, TDB_REPLACE); + if (ecode) + terror(ecode, "failed to move record"); + else + printf("record moved\n"); + + tdb_close( dst_tdb ); +} + +static int print_rec(struct tdb_context *the_tdb, TDB_DATA key, TDB_DATA dbuf, void *state) +{ + printf("\nkey %d bytes\n", (int)key.dsize); + print_asc((const char *)key.dptr, key.dsize); + printf("\ndata %d bytes\n", (int)dbuf.dsize); + print_data((const char *)dbuf.dptr, dbuf.dsize); + return 0; +} + +static int print_key(struct tdb_context *the_tdb, TDB_DATA key, TDB_DATA dbuf, void *state) +{ + printf("key %d bytes: ", (int)key.dsize); + print_asc((const char *)key.dptr, key.dsize); + printf("\n"); + return 0; +} + +static int print_hexkey(struct tdb_context *the_tdb, TDB_DATA key, TDB_DATA dbuf, void *state) +{ + printf("key %d bytes\n", (int)key.dsize); + print_data((const char *)key.dptr, key.dsize); + printf("\n"); + return 0; +} + +static int total_bytes; + +static int traverse_fn(struct tdb_context *the_tdb, TDB_DATA key, TDB_DATA dbuf, void *state) +{ + total_bytes += dbuf.dsize; + return 0; +} + +static void info_tdb(void) +{ + enum TDB_ERROR ecode; + char *summary; + + ecode = tdb_summary(tdb, TDB_SUMMARY_HISTOGRAMS, &summary); + + if (ecode) { + terror(ecode, "Getting summary"); + } else { + printf("%s", summary); + free(summary); + } +} + +static void speed_tdb(const char *tlimit) +{ + unsigned timelimit = tlimit?atoi(tlimit):0; + double t; + int ops; + if (timelimit == 0) timelimit = 5; + + ops = 0; + printf("Testing store speed for %u seconds\n", timelimit); + _start_timer(); + do { + long int r = random(); + TDB_DATA key, dbuf; + key = tdb_mkdata("store test", strlen("store test")); + dbuf.dptr = (unsigned char *)&r; + dbuf.dsize = sizeof(r); + tdb_store(tdb, key, dbuf, TDB_REPLACE); + t = _end_timer(); + ops++; + } while (t < timelimit); + printf("%10.3f ops/sec\n", ops/t); + + ops = 0; + printf("Testing fetch speed for %u seconds\n", timelimit); + _start_timer(); + do { + long int r = random(); + TDB_DATA key, dbuf; + key = tdb_mkdata("store test", strlen("store test")); + dbuf.dptr = (unsigned char *)&r; + dbuf.dsize = sizeof(r); + tdb_fetch(tdb, key, &dbuf); + t = _end_timer(); + ops++; + } while (t < timelimit); + printf("%10.3f ops/sec\n", ops/t); + + ops = 0; + printf("Testing transaction speed for %u seconds\n", timelimit); + _start_timer(); + do { + long int r = random(); + TDB_DATA key, dbuf; + key = tdb_mkdata("transaction test", strlen("transaction test")); + dbuf.dptr = (unsigned char *)&r; + dbuf.dsize = sizeof(r); + tdb_transaction_start(tdb); + tdb_store(tdb, key, dbuf, TDB_REPLACE); + tdb_transaction_commit(tdb); + t = _end_timer(); + ops++; + } while (t < timelimit); + printf("%10.3f ops/sec\n", ops/t); + + ops = 0; + printf("Testing traverse speed for %u seconds\n", timelimit); + _start_timer(); + do { + tdb_traverse(tdb, traverse_fn, NULL); + t = _end_timer(); + ops++; + } while (t < timelimit); + printf("%10.3f ops/sec\n", ops/t); +} + +static void toggle_mmap(void) +{ + disable_mmap = !disable_mmap; + if (disable_mmap) { + printf("mmap is disabled\n"); + } else { + printf("mmap is enabled\n"); + } +} + +static char *tdb_getline(const char *prompt) +{ + static char thisline[1024]; + char *p; + fputs(prompt, stdout); + thisline[0] = 0; + p = fgets(thisline, sizeof(thisline)-1, stdin); + if (p) p = strchr(p, '\n'); + if (p) *p = 0; + return p?thisline:NULL; +} + +static int do_delete_fn(struct tdb_context *the_tdb, TDB_DATA key, TDB_DATA dbuf, + void *state) +{ + return tdb_delete(the_tdb, key); +} + +static void first_record(struct tdb_context *the_tdb, TDB_DATA *pkey) +{ + TDB_DATA dbuf; + enum TDB_ERROR ecode; + ecode = tdb_firstkey(the_tdb, pkey); + if (!ecode) + ecode = tdb_fetch(the_tdb, *pkey, &dbuf); + if (ecode) terror(ecode, "fetch failed"); + else { + print_rec(the_tdb, *pkey, dbuf, NULL); + } +} + +static void next_record(struct tdb_context *the_tdb, TDB_DATA *pkey) +{ + TDB_DATA dbuf; + enum TDB_ERROR ecode; + ecode = tdb_nextkey(the_tdb, pkey); + + if (!ecode) + ecode = tdb_fetch(the_tdb, *pkey, &dbuf); + if (ecode) + terror(ecode, "fetch failed"); + else + print_rec(the_tdb, *pkey, dbuf, NULL); +} + +static void check_db(struct tdb_context *the_tdb) +{ + if (!the_tdb) { + printf("Error: No database opened!\n"); + } else { + if (tdb_check(the_tdb, NULL, NULL) != 0) + printf("Integrity check for the opened database failed.\n"); + else + printf("Database integrity is OK.\n"); + } +} + +static int do_command(void) +{ + COMMAND_TABLE *ctp = cmd_table; + enum commands mycmd = CMD_HELP; + int cmd_len; + + if (cmdname && strlen(cmdname) == 0) { + mycmd = CMD_NEXT; + } else { + while (ctp->name) { + cmd_len = strlen(ctp->name); + if (strncmp(ctp->name,cmdname,cmd_len) == 0) { + mycmd = ctp->cmd; + break; + } + ctp++; + } + } + + switch (mycmd) { + case CMD_CREATE_TDB: + bIterate = 0; + create_tdb(arg1); + return 0; + case CMD_OPEN_TDB: + bIterate = 0; + open_tdb(arg1); + return 0; + case CMD_SYSTEM: + /* Shell command */ + if (system(arg1) == -1) { + terror(TDB_SUCCESS, "system() call failed\n"); + } + return 0; + case CMD_QUIT: + return 1; + default: + /* all the rest require a open database */ + if (!tdb) { + bIterate = 0; + terror(TDB_SUCCESS, "database not open"); + help(); + return 0; + } + switch (mycmd) { + case CMD_TRANSACTION_START: + bIterate = 0; + tdb_transaction_start(tdb); + return 0; + case CMD_TRANSACTION_COMMIT: + bIterate = 0; + tdb_transaction_commit(tdb); + return 0; + case CMD_TRANSACTION_CANCEL: + bIterate = 0; + tdb_transaction_cancel(tdb); + return 0; + case CMD_ERASE: + bIterate = 0; + tdb_traverse(tdb, do_delete_fn, NULL); + return 0; + case CMD_DUMP: + bIterate = 0; + tdb_traverse(tdb, print_rec, NULL); + return 0; + case CMD_INSERT: + bIterate = 0; + insert_tdb(arg1, arg1len,arg2,arg2len); + return 0; + case CMD_MOVE: + bIterate = 0; + move_rec(arg1,arg1len,arg2); + return 0; + case CMD_STORE: + bIterate = 0; + store_tdb(arg1,arg1len,arg2,arg2len); + return 0; + case CMD_SHOW: + bIterate = 0; + show_tdb(arg1, arg1len); + return 0; + case CMD_KEYS: + tdb_traverse(tdb, print_key, NULL); + return 0; + case CMD_HEXKEYS: + tdb_traverse(tdb, print_hexkey, NULL); + return 0; + case CMD_DELETE: + bIterate = 0; + delete_tdb(arg1,arg1len); + return 0; +#if 0 + case CMD_LIST_HASH_FREE: + tdb_dump_all(tdb); + return 0; + case CMD_LIST_FREE: + tdb_printfreelist(tdb); + return 0; +#endif + case CMD_INFO: + info_tdb(); + return 0; + case CMD_SPEED: + speed_tdb(arg1); + return 0; + case CMD_MMAP: + toggle_mmap(); + return 0; + case CMD_FIRST: + bIterate = 1; + first_record(tdb, &iterate_kbuf); + return 0; + case CMD_NEXT: + if (bIterate) + next_record(tdb, &iterate_kbuf); + return 0; + case CMD_CHECK: + check_db(tdb); + return 0; + case CMD_HELP: + help(); + return 0; + case CMD_CREATE_TDB: + case CMD_OPEN_TDB: + case CMD_SYSTEM: + case CMD_QUIT: + /* + * unhandled commands. cases included here to avoid compiler + * warnings. + */ + return 0; + } + } + + return 0; +} + +static char *convert_string(char *instring, size_t *sizep) +{ + size_t length = 0; + char *outp, *inp; + char temp[3]; + + outp = inp = instring; + + while (*inp) { + if (*inp == '\\') { + inp++; + if (*inp && strchr("0123456789abcdefABCDEF",(int)*inp)) { + temp[0] = *inp++; + temp[1] = '\0'; + if (*inp && strchr("0123456789abcdefABCDEF",(int)*inp)) { + temp[1] = *inp++; + temp[2] = '\0'; + } + *outp++ = (char)strtol((const char *)temp,NULL,16); + } else { + *outp++ = *inp++; + } + } else { + *outp++ = *inp++; + } + length++; + } + *sizep = length; + return instring; +} + +int main(int argc, char *argv[]) +{ + cmdname = ""; + arg1 = NULL; + arg1len = 0; + arg2 = NULL; + arg2len = 0; + + if (argv[1]) { + cmdname = "open"; + arg1 = argv[1]; + do_command(); + cmdname = ""; + arg1 = NULL; + } + + switch (argc) { + case 1: + case 2: + /* Interactive mode */ + while ((cmdname = tdb_getline("tdb> "))) { + arg2 = arg1 = NULL; + if ((arg1 = strchr((const char *)cmdname,' ')) != NULL) { + arg1++; + arg2 = arg1; + while (*arg2) { + if (*arg2 == ' ') { + *arg2++ = '\0'; + break; + } + if ((*arg2++ == '\\') && (*arg2 == ' ')) { + arg2++; + } + } + } + if (arg1) arg1 = convert_string(arg1,&arg1len); + if (arg2) arg2 = convert_string(arg2,&arg2len); + if (do_command()) break; + } + break; + case 5: + arg2 = convert_string(argv[4],&arg2len); + case 4: + arg1 = convert_string(argv[3],&arg1len); + case 3: + cmdname = argv[2]; + default: + do_command(); + break; + } + + if (tdb) tdb_close(tdb); + + return 0; +} diff --git a/lib/tdb2/tools/tdb2torture.c b/lib/tdb2/tools/tdb2torture.c new file mode 100644 index 0000000000..f6a7a5064a --- /dev/null +++ b/lib/tdb2/tools/tdb2torture.c @@ -0,0 +1,494 @@ +/* this tests tdb by doing lots of ops from several simultaneous + writers - that stresses the locking code. +*/ + +#include "tdb2.h" +#include <stdlib.h> +#include <err.h> +#include <getopt.h> +#include <stdarg.h> +#include <stdio.h> +#include <string.h> +#include <errno.h> +#include <unistd.h> +#include <sys/types.h> +#include <fcntl.h> +#include <time.h> +#include <sys/wait.h> + +//#define REOPEN_PROB 30 +#define DELETE_PROB 8 +#define STORE_PROB 4 +#define APPEND_PROB 6 +#define TRANSACTION_PROB 10 +#define TRANSACTION_PREPARE_PROB 2 +#define LOCKSTORE_PROB 5 +#define TRAVERSE_PROB 20 +#define TRAVERSE_MOD_PROB 100 +#define TRAVERSE_ABORT_PROB 500 +#define CULL_PROB 100 +#define KEYLEN 3 +#define DATALEN 100 + +static struct tdb_context *db; +static int in_transaction; +static int in_traverse; +static int error_count; +#if TRANSACTION_PROB +static int always_transaction = 0; +#endif +static int loopnum; +static int count_pipe; +static union tdb_attribute log_attr; +static union tdb_attribute seed_attr; + +static void tdb_log(struct tdb_context *tdb, enum tdb_log_level level, + const char *message, void *data) +{ + fputs(message, stdout); + fflush(stdout); +#if 0 + { + char str[200]; + signal(SIGUSR1, SIG_IGN); + sprintf(str,"xterm -e gdb /proc/%d/exe %d", getpid(), getpid()); + system(str); + } +#endif +} + +#include "../private.h" + +static void segv_handler(int sig, siginfo_t *info, void *p) +{ + char string[100]; + + sprintf(string, "%u: death at %p (map_ptr %p, map_size %zu)\n", + getpid(), info->si_addr, db->file->map_ptr, + (size_t)db->file->map_size); + if (write(2, string, strlen(string)) > 0) + sleep(60); + _exit(11); +} + +static void fatal(struct tdb_context *tdb, const char *why) +{ + fprintf(stderr, "%u:%s:%s\n", getpid(), why, + tdb ? tdb_errorstr(tdb_error(tdb)) : "(no tdb)"); + error_count++; +} + +static char *randbuf(int len) +{ + char *buf; + int i; + buf = (char *)malloc(len+1); + + for (i=0;i<len;i++) { + buf[i] = 'a' + (rand() % 26); + } + buf[i] = 0; + return buf; +} + +static void addrec_db(void); +static int modify_traverse(struct tdb_context *tdb, TDB_DATA key, TDB_DATA dbuf, + void *state) +{ +#if CULL_PROB + if (random() % CULL_PROB == 0) { + tdb_delete(tdb, key); + } +#endif + +#if TRAVERSE_MOD_PROB + if (random() % TRAVERSE_MOD_PROB == 0) { + addrec_db(); + } +#endif + +#if TRAVERSE_ABORT_PROB + if (random() % TRAVERSE_ABORT_PROB == 0) + return 1; +#endif + + return 0; +} + +static void addrec_db(void) +{ + int klen, dlen; + char *k, *d; + TDB_DATA key, data; + + klen = 1 + (rand() % KEYLEN); + dlen = 1 + (rand() % DATALEN); + + k = randbuf(klen); + d = randbuf(dlen); + + key.dptr = (unsigned char *)k; + key.dsize = klen+1; + + data.dptr = (unsigned char *)d; + data.dsize = dlen+1; + +#if REOPEN_PROB + if (in_traverse == 0 && in_transaction == 0 && random() % REOPEN_PROB == 0) { + tdb_reopen_all(0); + goto next; + } +#endif + +#if TRANSACTION_PROB + if (in_traverse == 0 && in_transaction == 0 && (always_transaction || random() % TRANSACTION_PROB == 0)) { + if (tdb_transaction_start(db) != 0) { + fatal(db, "tdb_transaction_start failed"); + } + in_transaction++; + goto next; + } + if (in_traverse == 0 && in_transaction && random() % TRANSACTION_PROB == 0) { + if (random() % TRANSACTION_PREPARE_PROB == 0) { + if (tdb_transaction_prepare_commit(db) != 0) { + fatal(db, "tdb_transaction_prepare_commit failed"); + } + } + if (tdb_transaction_commit(db) != 0) { + fatal(db, "tdb_transaction_commit failed"); + } + in_transaction--; + goto next; + } + + if (in_traverse == 0 && in_transaction && random() % TRANSACTION_PROB == 0) { + tdb_transaction_cancel(db); + in_transaction--; + goto next; + } +#endif + +#if DELETE_PROB + if (random() % DELETE_PROB == 0) { + tdb_delete(db, key); + goto next; + } +#endif + +#if STORE_PROB + if (random() % STORE_PROB == 0) { + if (tdb_store(db, key, data, TDB_REPLACE) != 0) { + fatal(db, "tdb_store failed"); + } + goto next; + } +#endif + +#if APPEND_PROB + if (random() % APPEND_PROB == 0) { + if (tdb_append(db, key, data) != 0) { + fatal(db, "tdb_append failed"); + } + goto next; + } +#endif + +#if LOCKSTORE_PROB + if (random() % LOCKSTORE_PROB == 0) { + tdb_chainlock(db, key); + if (tdb_fetch(db, key, &data) != TDB_SUCCESS) { + data.dsize = 0; + data.dptr = NULL; + } + if (tdb_store(db, key, data, TDB_REPLACE) != 0) { + fatal(db, "tdb_store failed"); + } + if (data.dptr) free(data.dptr); + tdb_chainunlock(db, key); + goto next; + } +#endif + +#if TRAVERSE_PROB + /* FIXME: recursive traverses break transactions? */ + if (in_traverse == 0 && random() % TRAVERSE_PROB == 0) { + in_traverse++; + tdb_traverse(db, modify_traverse, NULL); + in_traverse--; + goto next; + } +#endif + + if (tdb_fetch(db, key, &data) == TDB_SUCCESS) + free(data.dptr); + +next: + free(k); + free(d); +} + +static int traverse_fn(struct tdb_context *tdb, TDB_DATA key, TDB_DATA dbuf, + void *state) +{ + tdb_delete(tdb, key); + return 0; +} + +static void usage(void) +{ + printf("Usage: tdbtorture" +#if TRANSACTION_PROB + " [-t]" +#endif + " [-k] [-n NUM_PROCS] [-l NUM_LOOPS] [-s SEED] [-S]\n"); + exit(0); +} + +static void send_count_and_suicide(int sig) +{ + /* This ensures our successor can continue where we left off. */ + if (write(count_pipe, &loopnum, sizeof(loopnum)) != sizeof(loopnum)) + exit(2); + /* This gives a unique signature. */ + kill(getpid(), SIGUSR2); +} + +static int run_child(int i, int seed, unsigned num_loops, unsigned start, + int tdb_flags) +{ + struct sigaction act = { .sa_sigaction = segv_handler, + .sa_flags = SA_SIGINFO }; + sigaction(11, &act, NULL); + + db = tdb_open("torture.tdb", tdb_flags, O_RDWR | O_CREAT, 0600, + &log_attr); + if (!db) { + fatal(NULL, "db open failed"); + } + +#if 0 + if (i == 0) { + printf("pid %i\n", getpid()); + sleep(9); + } else + sleep(10); +#endif + + srand(seed + i); + srandom(seed + i); + + /* Set global, then we're ready to handle being killed. */ + loopnum = start; + signal(SIGUSR1, send_count_and_suicide); + + for (;loopnum<num_loops && error_count == 0;loopnum++) { + addrec_db(); + } + + if (error_count == 0) { + tdb_traverse(db, NULL, NULL); +#if TRANSACTION_PROB + if (always_transaction) { + while (in_transaction) { + tdb_transaction_cancel(db); + in_transaction--; + } + if (tdb_transaction_start(db) != 0) + fatal(db, "tdb_transaction_start failed"); + } +#endif + tdb_traverse(db, traverse_fn, NULL); + tdb_traverse(db, traverse_fn, NULL); + +#if TRANSACTION_PROB + if (always_transaction) { + if (tdb_transaction_commit(db) != 0) + fatal(db, "tdb_transaction_commit failed"); + } +#endif + } + + tdb_close(db); + + return (error_count < 100 ? error_count : 100); +} + +int main(int argc, char * const *argv) +{ + int i, seed = -1; + int num_loops = 5000; + int num_procs = 3; + int c, pfds[2]; + extern char *optarg; + pid_t *pids; + int kill_random = 0; + int *done; + int tdb_flags = TDB_DEFAULT; + + log_attr.base.attr = TDB_ATTRIBUTE_LOG; + log_attr.base.next = &seed_attr; + log_attr.log.fn = tdb_log; + seed_attr.base.attr = TDB_ATTRIBUTE_SEED; + + while ((c = getopt(argc, argv, "n:l:s:thkS")) != -1) { + switch (c) { + case 'n': + num_procs = strtol(optarg, NULL, 0); + break; + case 'l': + num_loops = strtol(optarg, NULL, 0); + break; + case 's': + seed = strtol(optarg, NULL, 0); + break; + case 'S': + tdb_flags = TDB_NOSYNC; + break; + case 't': +#if TRANSACTION_PROB + always_transaction = 1; +#else + fprintf(stderr, "Transactions not supported\n"); + usage(); +#endif + break; + case 'k': + kill_random = 1; + break; + default: + usage(); + } + } + + unlink("torture.tdb"); + + if (seed == -1) { + seed = (getpid() + time(NULL)) & 0x7FFFFFFF; + } + seed_attr.seed.seed = (((uint64_t)seed) << 32) | seed; + + if (num_procs == 1 && !kill_random) { + /* Don't fork for this case, makes debugging easier. */ + error_count = run_child(0, seed, num_loops, 0, tdb_flags); + goto done; + } + + pids = (pid_t *)calloc(sizeof(pid_t), num_procs); + done = (int *)calloc(sizeof(int), num_procs); + + if (pipe(pfds) != 0) { + perror("Creating pipe"); + exit(1); + } + count_pipe = pfds[1]; + + for (i=0;i<num_procs;i++) { + if ((pids[i]=fork()) == 0) { + close(pfds[0]); + if (i == 0) { + printf("testing with %d processes, %d loops, seed=%d%s\n", + num_procs, num_loops, seed, +#if TRANSACTION_PROB + always_transaction ? " (all within transactions)" : "" +#else + "" +#endif + ); + } + exit(run_child(i, seed, num_loops, 0, tdb_flags)); + } + } + + while (num_procs) { + int status, j; + pid_t pid; + + if (error_count != 0) { + /* try and stop the test on any failure */ + for (j=0;j<num_procs;j++) { + if (pids[j] != 0) { + kill(pids[j], SIGTERM); + } + } + } + + pid = waitpid(-1, &status, kill_random ? WNOHANG : 0); + if (pid == 0) { + struct timespec ts; + + /* Sleep for 1/10 second. */ + ts.tv_sec = 0; + ts.tv_nsec = 100000000; + nanosleep(&ts, NULL); + + /* Kill someone. */ + kill(pids[random() % num_procs], SIGUSR1); + continue; + } + + if (pid == -1) { + perror("failed to wait for child\n"); + exit(1); + } + + for (j=0;j<num_procs;j++) { + if (pids[j] == pid) break; + } + if (j == num_procs) { + printf("unknown child %d exited!?\n", (int)pid); + exit(1); + } + if (WIFSIGNALED(status)) { + if (WTERMSIG(status) == SIGUSR2 + || WTERMSIG(status) == SIGUSR1) { + /* SIGUSR2 means they wrote to pipe. */ + if (WTERMSIG(status) == SIGUSR2) { + if (read(pfds[0], &done[j], + sizeof(done[j])) + != sizeof(done[j])) + err(1, + "Short read from child?"); + } + pids[j] = fork(); + if (pids[j] == 0) + exit(run_child(j, seed, num_loops, + done[j], tdb_flags)); + printf("Restarting child %i for %u-%u\n", + j, done[j], num_loops); + continue; + } + printf("child %d exited with signal %d\n", + (int)pid, WTERMSIG(status)); + error_count++; + } else { + if (WEXITSTATUS(status) != 0) { + printf("child %d exited with status %d\n", + (int)pid, WEXITSTATUS(status)); + error_count++; + } + } + memmove(&pids[j], &pids[j+1], + (num_procs - j - 1)*sizeof(pids[0])); + num_procs--; + } + + free(pids); + +done: + if (error_count == 0) { + db = tdb_open("torture.tdb", TDB_DEFAULT, O_RDWR | O_CREAT, + 0600, &log_attr); + if (!db) { + fatal(db, "db open failed"); + exit(1); + } + if (tdb_check(db, NULL, NULL) != 0) { + fatal(db, "db check failed"); + exit(1); + } + tdb_close(db); + printf("OK\n"); + } + + return error_count; +} diff --git a/lib/tdb2/transaction.c b/lib/tdb2/transaction.c new file mode 100644 index 0000000000..b13223bc2e --- /dev/null +++ b/lib/tdb2/transaction.c @@ -0,0 +1,1308 @@ + /* + Unix SMB/CIFS implementation. + + trivial database library + + Copyright (C) Andrew Tridgell 2005 + Copyright (C) Rusty Russell 2010 + + ** NOTE! The following LGPL license applies to the tdb + ** library. This does NOT imply that all of Samba is released + ** under the LGPL + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 3 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, see <http://www.gnu.org/licenses/>. +*/ + +#include "private.h" +#define SAFE_FREE(x) do { if ((x) != NULL) {free(x); (x)=NULL;} } while(0) + +/* + transaction design: + + - only allow a single transaction at a time per database. This makes + using the transaction API simpler, as otherwise the caller would + have to cope with temporary failures in transactions that conflict + with other current transactions + + - keep the transaction recovery information in the same file as the + database, using a special 'transaction recovery' record pointed at + by the header. This removes the need for extra journal files as + used by some other databases + + - dynamically allocated the transaction recover record, re-using it + for subsequent transactions. If a larger record is needed then + tdb_free() the old record to place it on the normal tdb freelist + before allocating the new record + + - during transactions, keep a linked list of writes all that have + been performed by intercepting all tdb_write() calls. The hooked + transaction versions of tdb_read() and tdb_write() check this + linked list and try to use the elements of the list in preference + to the real database. + + - don't allow any locks to be held when a transaction starts, + otherwise we can end up with deadlock (plus lack of lock nesting + in POSIX locks would mean the lock is lost) + + - if the caller gains a lock during the transaction but doesn't + release it then fail the commit + + - allow for nested calls to tdb_transaction_start(), re-using the + existing transaction record. If the inner transaction is canceled + then a subsequent commit will fail + + - keep a mirrored copy of the tdb hash chain heads to allow for the + fast hash heads scan on traverse, updating the mirrored copy in + the transaction version of tdb_write + + - allow callers to mix transaction and non-transaction use of tdb, + although once a transaction is started then an exclusive lock is + gained until the transaction is committed or canceled + + - the commit stategy involves first saving away all modified data + into a linearised buffer in the transaction recovery area, then + marking the transaction recovery area with a magic value to + indicate a valid recovery record. In total 4 fsync/msync calls are + needed per commit to prevent race conditions. It might be possible + to reduce this to 3 or even 2 with some more work. + + - check for a valid recovery record on open of the tdb, while the + open lock is held. Automatically recover from the transaction + recovery area if needed, then continue with the open as + usual. This allows for smooth crash recovery with no administrator + intervention. + + - if TDB_NOSYNC is passed to flags in tdb_open then transactions are + still available, but no transaction recovery area is used and no + fsync/msync calls are made. +*/ + +/* + hold the context of any current transaction +*/ +struct tdb_transaction { + /* the original io methods - used to do IOs to the real db */ + const struct tdb_methods *io_methods; + + /* the list of transaction blocks. When a block is first + written to, it gets created in this list */ + uint8_t **blocks; + size_t num_blocks; + size_t last_block_size; /* number of valid bytes in the last block */ + + /* non-zero when an internal transaction error has + occurred. All write operations will then fail until the + transaction is ended */ + int transaction_error; + + /* when inside a transaction we need to keep track of any + nested tdb_transaction_start() calls, as these are allowed, + but don't create a new transaction */ + unsigned int nesting; + + /* set when a prepare has already occurred */ + bool prepared; + tdb_off_t magic_offset; + + /* old file size before transaction */ + tdb_len_t old_map_size; +}; + +/* This doesn't really need to be pagesize, but we use it for similar reasons. */ +#define PAGESIZE 65536 + +/* + read while in a transaction. We need to check first if the data is in our list + of transaction elements, then if not do a real read +*/ +static enum TDB_ERROR transaction_read(struct tdb_context *tdb, tdb_off_t off, + void *buf, tdb_len_t len) +{ + size_t blk; + enum TDB_ERROR ecode; + + /* break it down into block sized ops */ + while (len + (off % PAGESIZE) > PAGESIZE) { + tdb_len_t len2 = PAGESIZE - (off % PAGESIZE); + ecode = transaction_read(tdb, off, buf, len2); + if (ecode != TDB_SUCCESS) { + return ecode; + } + len -= len2; + off += len2; + buf = (void *)(len2 + (char *)buf); + } + + if (len == 0) { + return TDB_SUCCESS; + } + + blk = off / PAGESIZE; + + /* see if we have it in the block list */ + if (tdb->transaction->num_blocks <= blk || + tdb->transaction->blocks[blk] == NULL) { + /* nope, do a real read */ + ecode = tdb->transaction->io_methods->tread(tdb, off, buf, len); + if (ecode != TDB_SUCCESS) { + goto fail; + } + return 0; + } + + /* it is in the block list. Now check for the last block */ + if (blk == tdb->transaction->num_blocks-1) { + if (len > tdb->transaction->last_block_size) { + ecode = TDB_ERR_IO; + goto fail; + } + } + + /* now copy it out of this block */ + memcpy(buf, tdb->transaction->blocks[blk] + (off % PAGESIZE), len); + return TDB_SUCCESS; + +fail: + tdb->transaction->transaction_error = 1; + return tdb_logerr(tdb, ecode, TDB_LOG_ERROR, + "transaction_read: failed at off=%zu len=%zu", + (size_t)off, (size_t)len); +} + + +/* + write while in a transaction +*/ +static enum TDB_ERROR transaction_write(struct tdb_context *tdb, tdb_off_t off, + const void *buf, tdb_len_t len) +{ + size_t blk; + enum TDB_ERROR ecode; + + /* Only a commit is allowed on a prepared transaction */ + if (tdb->transaction->prepared) { + ecode = tdb_logerr(tdb, TDB_ERR_EINVAL, TDB_LOG_ERROR, + "transaction_write: transaction already" + " prepared, write not allowed"); + goto fail; + } + + /* break it up into block sized chunks */ + while (len + (off % PAGESIZE) > PAGESIZE) { + tdb_len_t len2 = PAGESIZE - (off % PAGESIZE); + ecode = transaction_write(tdb, off, buf, len2); + if (ecode != TDB_SUCCESS) { + return -1; + } + len -= len2; + off += len2; + if (buf != NULL) { + buf = (const void *)(len2 + (const char *)buf); + } + } + + if (len == 0) { + return TDB_SUCCESS; + } + + blk = off / PAGESIZE; + off = off % PAGESIZE; + + if (tdb->transaction->num_blocks <= blk) { + uint8_t **new_blocks; + /* expand the blocks array */ + if (tdb->transaction->blocks == NULL) { + new_blocks = (uint8_t **)malloc( + (blk+1)*sizeof(uint8_t *)); + } else { + new_blocks = (uint8_t **)realloc( + tdb->transaction->blocks, + (blk+1)*sizeof(uint8_t *)); + } + if (new_blocks == NULL) { + ecode = tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR, + "transaction_write:" + " failed to allocate"); + goto fail; + } + memset(&new_blocks[tdb->transaction->num_blocks], 0, + (1+(blk - tdb->transaction->num_blocks))*sizeof(uint8_t *)); + tdb->transaction->blocks = new_blocks; + tdb->transaction->num_blocks = blk+1; + tdb->transaction->last_block_size = 0; + } + + /* allocate and fill a block? */ + if (tdb->transaction->blocks[blk] == NULL) { + tdb->transaction->blocks[blk] = (uint8_t *)calloc(PAGESIZE, 1); + if (tdb->transaction->blocks[blk] == NULL) { + ecode = tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR, + "transaction_write:" + " failed to allocate"); + goto fail; + } + if (tdb->transaction->old_map_size > blk * PAGESIZE) { + tdb_len_t len2 = PAGESIZE; + if (len2 + (blk * PAGESIZE) > tdb->transaction->old_map_size) { + len2 = tdb->transaction->old_map_size - (blk * PAGESIZE); + } + ecode = tdb->transaction->io_methods->tread(tdb, + blk * PAGESIZE, + tdb->transaction->blocks[blk], + len2); + if (ecode != TDB_SUCCESS) { + ecode = tdb_logerr(tdb, ecode, + TDB_LOG_ERROR, + "transaction_write:" + " failed to" + " read old block: %s", + strerror(errno)); + SAFE_FREE(tdb->transaction->blocks[blk]); + goto fail; + } + if (blk == tdb->transaction->num_blocks-1) { + tdb->transaction->last_block_size = len2; + } + } + } + + /* overwrite part of an existing block */ + if (buf == NULL) { + memset(tdb->transaction->blocks[blk] + off, 0, len); + } else { + memcpy(tdb->transaction->blocks[blk] + off, buf, len); + } + if (blk == tdb->transaction->num_blocks-1) { + if (len + off > tdb->transaction->last_block_size) { + tdb->transaction->last_block_size = len + off; + } + } + + return TDB_SUCCESS; + +fail: + tdb->transaction->transaction_error = 1; + return ecode; +} + + +/* + write while in a transaction - this variant never expands the transaction blocks, it only + updates existing blocks. This means it cannot change the recovery size +*/ +static void transaction_write_existing(struct tdb_context *tdb, tdb_off_t off, + const void *buf, tdb_len_t len) +{ + size_t blk; + + /* break it up into block sized chunks */ + while (len + (off % PAGESIZE) > PAGESIZE) { + tdb_len_t len2 = PAGESIZE - (off % PAGESIZE); + transaction_write_existing(tdb, off, buf, len2); + len -= len2; + off += len2; + if (buf != NULL) { + buf = (const void *)(len2 + (const char *)buf); + } + } + + if (len == 0) { + return; + } + + blk = off / PAGESIZE; + off = off % PAGESIZE; + + if (tdb->transaction->num_blocks <= blk || + tdb->transaction->blocks[blk] == NULL) { + return; + } + + if (blk == tdb->transaction->num_blocks-1 && + off + len > tdb->transaction->last_block_size) { + if (off >= tdb->transaction->last_block_size) { + return; + } + len = tdb->transaction->last_block_size - off; + } + + /* overwrite part of an existing block */ + memcpy(tdb->transaction->blocks[blk] + off, buf, len); +} + + +/* + out of bounds check during a transaction +*/ +static enum TDB_ERROR transaction_oob(struct tdb_context *tdb, tdb_off_t len, + bool probe) +{ + if (len <= tdb->file->map_size) { + return TDB_SUCCESS; + } + if (!probe) { + tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR, + "tdb_oob len %lld beyond transaction size %lld", + (long long)len, + (long long)tdb->file->map_size); + } + return TDB_ERR_IO; +} + +/* + transaction version of tdb_expand(). +*/ +static enum TDB_ERROR transaction_expand_file(struct tdb_context *tdb, + tdb_off_t addition) +{ + enum TDB_ERROR ecode; + + /* add a write to the transaction elements, so subsequent + reads see the zero data */ + ecode = transaction_write(tdb, tdb->file->map_size, NULL, addition); + if (ecode == TDB_SUCCESS) { + tdb->file->map_size += addition; + } + return ecode; +} + +static void *transaction_direct(struct tdb_context *tdb, tdb_off_t off, + size_t len, bool write_mode) +{ + size_t blk = off / PAGESIZE, end_blk; + + /* This is wrong for zero-length blocks, but will fail gracefully */ + end_blk = (off + len - 1) / PAGESIZE; + + /* Can only do direct if in single block and we've already copied. */ + if (write_mode) { + tdb->stats.transaction_write_direct++; + if (blk != end_blk + || blk >= tdb->transaction->num_blocks + || tdb->transaction->blocks[blk] == NULL) { + tdb->stats.transaction_write_direct_fail++; + return NULL; + } + return tdb->transaction->blocks[blk] + off % PAGESIZE; + } + + tdb->stats.transaction_read_direct++; + /* Single which we have copied? */ + if (blk == end_blk + && blk < tdb->transaction->num_blocks + && tdb->transaction->blocks[blk]) + return tdb->transaction->blocks[blk] + off % PAGESIZE; + + /* Otherwise must be all not copied. */ + while (blk <= end_blk) { + if (blk >= tdb->transaction->num_blocks) + break; + if (tdb->transaction->blocks[blk]) { + tdb->stats.transaction_read_direct_fail++; + return NULL; + } + blk++; + } + return tdb->transaction->io_methods->direct(tdb, off, len, false); +} + +static const struct tdb_methods transaction_methods = { + transaction_read, + transaction_write, + transaction_oob, + transaction_expand_file, + transaction_direct, +}; + +/* + sync to disk +*/ +static enum TDB_ERROR transaction_sync(struct tdb_context *tdb, + tdb_off_t offset, tdb_len_t length) +{ + if (tdb->flags & TDB_NOSYNC) { + return TDB_SUCCESS; + } + + if (fsync(tdb->file->fd) != 0) { + return tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR, + "tdb_transaction: fsync failed: %s", + strerror(errno)); + } +#ifdef MS_SYNC + if (tdb->file->map_ptr) { + tdb_off_t moffset = offset & ~(getpagesize()-1); + if (msync(moffset + (char *)tdb->file->map_ptr, + length + (offset - moffset), MS_SYNC) != 0) { + return tdb_logerr(tdb, TDB_ERR_IO, TDB_LOG_ERROR, + "tdb_transaction: msync failed: %s", + strerror(errno)); + } + } +#endif + return TDB_SUCCESS; +} + + +static void _tdb_transaction_cancel(struct tdb_context *tdb) +{ + int i; + enum TDB_ERROR ecode; + + if (tdb->transaction == NULL) { + tdb_logerr(tdb, TDB_ERR_EINVAL, TDB_LOG_USE_ERROR, + "tdb_transaction_cancel: no transaction"); + return; + } + + if (tdb->transaction->nesting != 0) { + tdb->transaction->transaction_error = 1; + tdb->transaction->nesting--; + return; + } + + tdb->file->map_size = tdb->transaction->old_map_size; + + /* free all the transaction blocks */ + for (i=0;i<tdb->transaction->num_blocks;i++) { + if (tdb->transaction->blocks[i] != NULL) { + free(tdb->transaction->blocks[i]); + } + } + SAFE_FREE(tdb->transaction->blocks); + + if (tdb->transaction->magic_offset) { + const struct tdb_methods *methods = tdb->transaction->io_methods; + uint64_t invalid = TDB_RECOVERY_INVALID_MAGIC; + + /* remove the recovery marker */ + ecode = methods->twrite(tdb, tdb->transaction->magic_offset, + &invalid, sizeof(invalid)); + if (ecode == TDB_SUCCESS) + ecode = transaction_sync(tdb, + tdb->transaction->magic_offset, + sizeof(invalid)); + if (ecode != TDB_SUCCESS) { + tdb_logerr(tdb, ecode, TDB_LOG_ERROR, + "tdb_transaction_cancel: failed to remove" + " recovery magic"); + } + } + + if (tdb->file->allrecord_lock.count) + tdb_allrecord_unlock(tdb, tdb->file->allrecord_lock.ltype); + + /* restore the normal io methods */ + tdb->methods = tdb->transaction->io_methods; + + tdb_transaction_unlock(tdb, F_WRLCK); + + if (tdb_has_open_lock(tdb)) + tdb_unlock_open(tdb, F_WRLCK); + + SAFE_FREE(tdb->transaction); +} + +/* + start a tdb transaction. No token is returned, as only a single + transaction is allowed to be pending per tdb_context +*/ +enum TDB_ERROR tdb_transaction_start(struct tdb_context *tdb) +{ + enum TDB_ERROR ecode; + + tdb->stats.transactions++; + /* some sanity checks */ + if (tdb->read_only || (tdb->flags & TDB_INTERNAL)) { + return tdb->last_error = tdb_logerr(tdb, TDB_ERR_EINVAL, + TDB_LOG_USE_ERROR, + "tdb_transaction_start:" + " cannot start a" + " transaction on a " + "read-only or internal db"); + } + + /* cope with nested tdb_transaction_start() calls */ + if (tdb->transaction != NULL) { + if (!(tdb->flags & TDB_ALLOW_NESTING)) { + return tdb->last_error + = tdb_logerr(tdb, TDB_ERR_IO, + TDB_LOG_USE_ERROR, + "tdb_transaction_start:" + " already inside transaction"); + } + tdb->transaction->nesting++; + tdb->stats.transaction_nest++; + return 0; + } + + if (tdb_has_hash_locks(tdb)) { + /* the caller must not have any locks when starting a + transaction as otherwise we'll be screwed by lack + of nested locks in POSIX */ + return tdb->last_error = tdb_logerr(tdb, TDB_ERR_LOCK, + TDB_LOG_USE_ERROR, + "tdb_transaction_start:" + " cannot start a" + " transaction with locks" + " held"); + } + + tdb->transaction = (struct tdb_transaction *) + calloc(sizeof(struct tdb_transaction), 1); + if (tdb->transaction == NULL) { + return tdb->last_error = tdb_logerr(tdb, TDB_ERR_OOM, + TDB_LOG_ERROR, + "tdb_transaction_start:" + " cannot allocate"); + } + + /* get the transaction write lock. This is a blocking lock. As + discussed with Volker, there are a number of ways we could + make this async, which we will probably do in the future */ + ecode = tdb_transaction_lock(tdb, F_WRLCK); + if (ecode != TDB_SUCCESS) { + SAFE_FREE(tdb->transaction->blocks); + SAFE_FREE(tdb->transaction); + return tdb->last_error = ecode; + } + + /* get a read lock over entire file. This is upgraded to a write + lock during the commit */ + ecode = tdb_allrecord_lock(tdb, F_RDLCK, TDB_LOCK_WAIT, true); + if (ecode != TDB_SUCCESS) { + goto fail_allrecord_lock; + } + + /* make sure we know about any file expansions already done by + anyone else */ + tdb->methods->oob(tdb, tdb->file->map_size + 1, true); + tdb->transaction->old_map_size = tdb->file->map_size; + + /* finally hook the io methods, replacing them with + transaction specific methods */ + tdb->transaction->io_methods = tdb->methods; + tdb->methods = &transaction_methods; + return tdb->last_error = TDB_SUCCESS; + +fail_allrecord_lock: + tdb_transaction_unlock(tdb, F_WRLCK); + SAFE_FREE(tdb->transaction->blocks); + SAFE_FREE(tdb->transaction); + return tdb->last_error = ecode; +} + + +/* + cancel the current transaction +*/ +void tdb_transaction_cancel(struct tdb_context *tdb) +{ + tdb->stats.transaction_cancel++; + _tdb_transaction_cancel(tdb); +} + +/* + work out how much space the linearised recovery data will consume (worst case) +*/ +static tdb_len_t tdb_recovery_size(struct tdb_context *tdb) +{ + tdb_len_t recovery_size = 0; + int i; + + recovery_size = 0; + for (i=0;i<tdb->transaction->num_blocks;i++) { + if (i * PAGESIZE >= tdb->transaction->old_map_size) { + break; + } + if (tdb->transaction->blocks[i] == NULL) { + continue; + } + recovery_size += 2*sizeof(tdb_off_t); + if (i == tdb->transaction->num_blocks-1) { + recovery_size += tdb->transaction->last_block_size; + } else { + recovery_size += PAGESIZE; + } + } + + return recovery_size; +} + +static enum TDB_ERROR tdb_recovery_area(struct tdb_context *tdb, + const struct tdb_methods *methods, + tdb_off_t *recovery_offset, + struct tdb_recovery_record *rec) +{ + enum TDB_ERROR ecode; + + *recovery_offset = tdb_read_off(tdb, + offsetof(struct tdb_header, recovery)); + if (TDB_OFF_IS_ERR(*recovery_offset)) { + return *recovery_offset; + } + + if (*recovery_offset == 0) { + rec->max_len = 0; + return TDB_SUCCESS; + } + + ecode = methods->tread(tdb, *recovery_offset, rec, sizeof(*rec)); + if (ecode != TDB_SUCCESS) + return ecode; + + tdb_convert(tdb, rec, sizeof(*rec)); + /* ignore invalid recovery regions: can happen in crash */ + if (rec->magic != TDB_RECOVERY_MAGIC && + rec->magic != TDB_RECOVERY_INVALID_MAGIC) { + *recovery_offset = 0; + rec->max_len = 0; + } + return TDB_SUCCESS; +} + +static unsigned int same(const unsigned char *new, + const unsigned char *old, + unsigned int length) +{ + unsigned int i; + + for (i = 0; i < length; i++) { + if (new[i] != old[i]) + break; + } + return i; +} + +static unsigned int different(const unsigned char *new, + const unsigned char *old, + unsigned int length, + unsigned int min_same, + unsigned int *samelen) +{ + unsigned int i; + + *samelen = 0; + for (i = 0; i < length; i++) { + if (new[i] == old[i]) { + (*samelen)++; + } else { + if (*samelen >= min_same) { + return i - *samelen; + } + *samelen = 0; + } + } + + if (*samelen < min_same) + *samelen = 0; + return length - *samelen; +} + +/* Allocates recovery blob, without tdb_recovery_record at head set up. */ +static struct tdb_recovery_record *alloc_recovery(struct tdb_context *tdb, + tdb_len_t *len) +{ + struct tdb_recovery_record *rec; + size_t i; + enum TDB_ERROR ecode; + unsigned char *p; + const struct tdb_methods *old_methods = tdb->methods; + + rec = malloc(sizeof(*rec) + tdb_recovery_size(tdb)); + if (!rec) { + tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR, + "transaction_setup_recovery:" + " cannot allocate"); + return TDB_ERR_PTR(TDB_ERR_OOM); + } + + /* We temporarily revert to the old I/O methods, so we can use + * tdb_access_read */ + tdb->methods = tdb->transaction->io_methods; + + /* build the recovery data into a single blob to allow us to do a single + large write, which should be more efficient */ + p = (unsigned char *)(rec + 1); + for (i=0;i<tdb->transaction->num_blocks;i++) { + tdb_off_t offset; + tdb_len_t length; + unsigned int off; + const unsigned char *buffer; + + if (tdb->transaction->blocks[i] == NULL) { + continue; + } + + offset = i * PAGESIZE; + length = PAGESIZE; + if (i == tdb->transaction->num_blocks-1) { + length = tdb->transaction->last_block_size; + } + + if (offset >= tdb->transaction->old_map_size) { + continue; + } + + if (offset + length > tdb->file->map_size) { + ecode = tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR, + "tdb_transaction_setup_recovery:" + " transaction data over new region" + " boundary"); + goto fail; + } + if (offset + length > tdb->transaction->old_map_size) { + /* Short read at EOF. */ + length = tdb->transaction->old_map_size - offset; + } + buffer = tdb_access_read(tdb, offset, length, false); + if (TDB_PTR_IS_ERR(buffer)) { + ecode = TDB_PTR_ERR(buffer); + goto fail; + } + + /* Skip over anything the same at the start. */ + off = same(tdb->transaction->blocks[i], buffer, length); + offset += off; + + while (off < length) { + tdb_len_t len; + unsigned int samelen; + + len = different(tdb->transaction->blocks[i] + off, + buffer + off, length - off, + sizeof(offset) + sizeof(len) + 1, + &samelen); + + memcpy(p, &offset, sizeof(offset)); + memcpy(p + sizeof(offset), &len, sizeof(len)); + tdb_convert(tdb, p, sizeof(offset) + sizeof(len)); + p += sizeof(offset) + sizeof(len); + memcpy(p, buffer + off, len); + p += len; + off += len + samelen; + offset += len + samelen; + } + tdb_access_release(tdb, buffer); + } + + *len = p - (unsigned char *)(rec + 1); + tdb->methods = old_methods; + return rec; + +fail: + free(rec); + tdb->methods = old_methods; + return TDB_ERR_PTR(ecode); +} + +static tdb_off_t create_recovery_area(struct tdb_context *tdb, + tdb_len_t rec_length, + struct tdb_recovery_record *rec) +{ + tdb_off_t off, recovery_off; + tdb_len_t addition; + enum TDB_ERROR ecode; + const struct tdb_methods *methods = tdb->transaction->io_methods; + + /* round up to a multiple of page size. Overallocate, since each + * such allocation forces us to expand the file. */ + rec->max_len + = (((sizeof(*rec) + rec_length + rec_length / 2) + + PAGESIZE-1) & ~(PAGESIZE-1)) + - sizeof(*rec); + off = tdb->file->map_size; + + /* Restore ->map_size before calling underlying expand_file. + Also so that we don't try to expand the file again in the + transaction commit, which would destroy the recovery + area */ + addition = (tdb->file->map_size - tdb->transaction->old_map_size) + + sizeof(*rec) + rec->max_len; + tdb->file->map_size = tdb->transaction->old_map_size; + tdb->stats.transaction_expand_file++; + ecode = methods->expand_file(tdb, addition); + if (ecode != TDB_SUCCESS) { + return tdb_logerr(tdb, ecode, TDB_LOG_ERROR, + "tdb_recovery_allocate:" + " failed to create recovery area"); + } + + /* we have to reset the old map size so that we don't try to + expand the file again in the transaction commit, which + would destroy the recovery area */ + tdb->transaction->old_map_size = tdb->file->map_size; + + /* write the recovery header offset and sync - we can sync without a race here + as the magic ptr in the recovery record has not been set */ + recovery_off = off; + tdb_convert(tdb, &recovery_off, sizeof(recovery_off)); + ecode = methods->twrite(tdb, offsetof(struct tdb_header, recovery), + &recovery_off, sizeof(tdb_off_t)); + if (ecode != TDB_SUCCESS) { + return tdb_logerr(tdb, ecode, TDB_LOG_ERROR, + "tdb_recovery_allocate:" + " failed to write recovery head"); + } + transaction_write_existing(tdb, offsetof(struct tdb_header, recovery), + &recovery_off, + sizeof(tdb_off_t)); + return off; +} + +/* + setup the recovery data that will be used on a crash during commit +*/ +static enum TDB_ERROR transaction_setup_recovery(struct tdb_context *tdb) +{ + tdb_len_t recovery_size = 0; + tdb_off_t recovery_off = 0; + tdb_off_t old_map_size = tdb->transaction->old_map_size; + struct tdb_recovery_record *recovery; + const struct tdb_methods *methods = tdb->transaction->io_methods; + uint64_t magic; + enum TDB_ERROR ecode; + + recovery = alloc_recovery(tdb, &recovery_size); + if (TDB_PTR_IS_ERR(recovery)) + return TDB_PTR_ERR(recovery); + + ecode = tdb_recovery_area(tdb, methods, &recovery_off, recovery); + if (ecode) { + free(recovery); + return ecode; + } + + if (recovery->max_len < recovery_size) { + /* Not large enough. Free up old recovery area. */ + if (recovery_off) { + tdb->stats.frees++; + ecode = add_free_record(tdb, recovery_off, + sizeof(*recovery) + + recovery->max_len, + TDB_LOCK_WAIT, true); + free(recovery); + if (ecode != TDB_SUCCESS) { + return tdb_logerr(tdb, ecode, TDB_LOG_ERROR, + "tdb_recovery_allocate:" + " failed to free previous" + " recovery area"); + } + + /* Refresh recovery after add_free_record above. */ + recovery = alloc_recovery(tdb, &recovery_size); + if (TDB_PTR_IS_ERR(recovery)) + return TDB_PTR_ERR(recovery); + } + + recovery_off = create_recovery_area(tdb, recovery_size, + recovery); + if (TDB_OFF_IS_ERR(recovery_off)) { + free(recovery); + return recovery_off; + } + } + + /* Now we know size, convert rec header. */ + recovery->magic = TDB_RECOVERY_INVALID_MAGIC; + recovery->len = recovery_size; + recovery->eof = old_map_size; + tdb_convert(tdb, recovery, sizeof(*recovery)); + + /* write the recovery data to the recovery area */ + ecode = methods->twrite(tdb, recovery_off, recovery, recovery_size); + if (ecode != TDB_SUCCESS) { + free(recovery); + return tdb_logerr(tdb, ecode, TDB_LOG_ERROR, + "tdb_transaction_setup_recovery:" + " failed to write recovery data"); + } + transaction_write_existing(tdb, recovery_off, recovery, recovery_size); + + free(recovery); + + /* as we don't have ordered writes, we have to sync the recovery + data before we update the magic to indicate that the recovery + data is present */ + ecode = transaction_sync(tdb, recovery_off, recovery_size); + if (ecode != TDB_SUCCESS) + return ecode; + + magic = TDB_RECOVERY_MAGIC; + tdb_convert(tdb, &magic, sizeof(magic)); + + tdb->transaction->magic_offset + = recovery_off + offsetof(struct tdb_recovery_record, magic); + + ecode = methods->twrite(tdb, tdb->transaction->magic_offset, + &magic, sizeof(magic)); + if (ecode != TDB_SUCCESS) { + return tdb_logerr(tdb, ecode, TDB_LOG_ERROR, + "tdb_transaction_setup_recovery:" + " failed to write recovery magic"); + } + transaction_write_existing(tdb, tdb->transaction->magic_offset, + &magic, sizeof(magic)); + + /* ensure the recovery magic marker is on disk */ + return transaction_sync(tdb, tdb->transaction->magic_offset, + sizeof(magic)); +} + +static enum TDB_ERROR _tdb_transaction_prepare_commit(struct tdb_context *tdb) +{ + const struct tdb_methods *methods; + enum TDB_ERROR ecode; + + if (tdb->transaction == NULL) { + return tdb_logerr(tdb, TDB_ERR_EINVAL, TDB_LOG_USE_ERROR, + "tdb_transaction_prepare_commit:" + " no transaction"); + } + + if (tdb->transaction->prepared) { + _tdb_transaction_cancel(tdb); + return tdb_logerr(tdb, TDB_ERR_EINVAL, TDB_LOG_USE_ERROR, + "tdb_transaction_prepare_commit:" + " transaction already prepared"); + } + + if (tdb->transaction->transaction_error) { + _tdb_transaction_cancel(tdb); + return tdb_logerr(tdb, TDB_ERR_EINVAL, TDB_LOG_ERROR, + "tdb_transaction_prepare_commit:" + " transaction error pending"); + } + + + if (tdb->transaction->nesting != 0) { + return TDB_SUCCESS; + } + + /* check for a null transaction */ + if (tdb->transaction->blocks == NULL) { + return TDB_SUCCESS; + } + + methods = tdb->transaction->io_methods; + + /* upgrade the main transaction lock region to a write lock */ + ecode = tdb_allrecord_upgrade(tdb); + if (ecode != TDB_SUCCESS) { + return ecode; + } + + /* get the open lock - this prevents new users attaching to the database + during the commit */ + ecode = tdb_lock_open(tdb, F_WRLCK, TDB_LOCK_WAIT|TDB_LOCK_NOCHECK); + if (ecode != TDB_SUCCESS) { + return ecode; + } + + /* Since we have whole db locked, we don't need the expansion lock. */ + if (!(tdb->flags & TDB_NOSYNC)) { + /* Sets up tdb->transaction->recovery and + * tdb->transaction->magic_offset. */ + ecode = transaction_setup_recovery(tdb); + if (ecode != TDB_SUCCESS) { + return ecode; + } + } + + tdb->transaction->prepared = true; + + /* expand the file to the new size if needed */ + if (tdb->file->map_size != tdb->transaction->old_map_size) { + tdb_len_t add; + + add = tdb->file->map_size - tdb->transaction->old_map_size; + /* Restore original map size for tdb_expand_file */ + tdb->file->map_size = tdb->transaction->old_map_size; + ecode = methods->expand_file(tdb, add); + if (ecode != TDB_SUCCESS) { + return ecode; + } + } + + /* Keep the open lock until the actual commit */ + return TDB_SUCCESS; +} + +/* + prepare to commit the current transaction +*/ +enum TDB_ERROR tdb_transaction_prepare_commit(struct tdb_context *tdb) +{ + return _tdb_transaction_prepare_commit(tdb); +} + +/* + commit the current transaction +*/ +enum TDB_ERROR tdb_transaction_commit(struct tdb_context *tdb) +{ + const struct tdb_methods *methods; + int i; + enum TDB_ERROR ecode; + + if (tdb->transaction == NULL) { + return tdb->last_error = tdb_logerr(tdb, TDB_ERR_EINVAL, + TDB_LOG_USE_ERROR, + "tdb_transaction_commit:" + " no transaction"); + } + + tdb_trace(tdb, "tdb_transaction_commit"); + + if (tdb->transaction->nesting != 0) { + tdb->transaction->nesting--; + return tdb->last_error = TDB_SUCCESS; + } + + /* check for a null transaction */ + if (tdb->transaction->blocks == NULL) { + _tdb_transaction_cancel(tdb); + return tdb->last_error = TDB_SUCCESS; + } + + if (!tdb->transaction->prepared) { + ecode = _tdb_transaction_prepare_commit(tdb); + if (ecode != TDB_SUCCESS) { + _tdb_transaction_cancel(tdb); + return tdb->last_error = ecode; + } + } + + methods = tdb->transaction->io_methods; + + /* perform all the writes */ + for (i=0;i<tdb->transaction->num_blocks;i++) { + tdb_off_t offset; + tdb_len_t length; + + if (tdb->transaction->blocks[i] == NULL) { + continue; + } + + offset = i * PAGESIZE; + length = PAGESIZE; + if (i == tdb->transaction->num_blocks-1) { + length = tdb->transaction->last_block_size; + } + + ecode = methods->twrite(tdb, offset, + tdb->transaction->blocks[i], length); + if (ecode != TDB_SUCCESS) { + /* we've overwritten part of the data and + possibly expanded the file, so we need to + run the crash recovery code */ + tdb->methods = methods; + tdb_transaction_recover(tdb); + + _tdb_transaction_cancel(tdb); + + return tdb->last_error = ecode; + } + SAFE_FREE(tdb->transaction->blocks[i]); + } + + SAFE_FREE(tdb->transaction->blocks); + tdb->transaction->num_blocks = 0; + + /* ensure the new data is on disk */ + ecode = transaction_sync(tdb, 0, tdb->file->map_size); + if (ecode != TDB_SUCCESS) { + return tdb->last_error = ecode; + } + + /* + TODO: maybe write to some dummy hdr field, or write to magic + offset without mmap, before the last sync, instead of the + utime() call + */ + + /* on some systems (like Linux 2.6.x) changes via mmap/msync + don't change the mtime of the file, this means the file may + not be backed up (as tdb rounding to block sizes means that + file size changes are quite rare too). The following forces + mtime changes when a transaction completes */ +#if HAVE_UTIME + utime(tdb->name, NULL); +#endif + + /* use a transaction cancel to free memory and remove the + transaction locks: it "restores" map_size, too. */ + tdb->transaction->old_map_size = tdb->file->map_size; + _tdb_transaction_cancel(tdb); + + return tdb->last_error = TDB_SUCCESS; +} + + +/* + recover from an aborted transaction. Must be called with exclusive + database write access already established (including the open + lock to prevent new processes attaching) +*/ +enum TDB_ERROR tdb_transaction_recover(struct tdb_context *tdb) +{ + tdb_off_t recovery_head, recovery_eof; + unsigned char *data, *p; + struct tdb_recovery_record rec; + enum TDB_ERROR ecode; + + /* find the recovery area */ + recovery_head = tdb_read_off(tdb, offsetof(struct tdb_header,recovery)); + if (TDB_OFF_IS_ERR(recovery_head)) { + return tdb_logerr(tdb, recovery_head, TDB_LOG_ERROR, + "tdb_transaction_recover:" + " failed to read recovery head"); + } + + if (recovery_head == 0) { + /* we have never allocated a recovery record */ + return TDB_SUCCESS; + } + + /* read the recovery record */ + ecode = tdb_read_convert(tdb, recovery_head, &rec, sizeof(rec)); + if (ecode != TDB_SUCCESS) { + return tdb_logerr(tdb, ecode, TDB_LOG_ERROR, + "tdb_transaction_recover:" + " failed to read recovery record"); + } + + if (rec.magic != TDB_RECOVERY_MAGIC) { + /* there is no valid recovery data */ + return TDB_SUCCESS; + } + + if (tdb->read_only) { + return tdb_logerr(tdb, TDB_ERR_CORRUPT, TDB_LOG_ERROR, + "tdb_transaction_recover:" + " attempt to recover read only database"); + } + + recovery_eof = rec.eof; + + data = (unsigned char *)malloc(rec.len); + if (data == NULL) { + return tdb_logerr(tdb, TDB_ERR_OOM, TDB_LOG_ERROR, + "tdb_transaction_recover:" + " failed to allocate recovery data"); + } + + /* read the full recovery data */ + ecode = tdb->methods->tread(tdb, recovery_head + sizeof(rec), data, + rec.len); + if (ecode != TDB_SUCCESS) { + return tdb_logerr(tdb, ecode, TDB_LOG_ERROR, + "tdb_transaction_recover:" + " failed to read recovery data"); + } + + /* recover the file data */ + p = data; + while (p+sizeof(tdb_off_t)+sizeof(tdb_len_t) < data + rec.len) { + tdb_off_t ofs; + tdb_len_t len; + tdb_convert(tdb, p, sizeof(ofs) + sizeof(len)); + memcpy(&ofs, p, sizeof(ofs)); + memcpy(&len, p + sizeof(ofs), sizeof(len)); + p += sizeof(ofs) + sizeof(len); + + ecode = tdb->methods->twrite(tdb, ofs, p, len); + if (ecode != TDB_SUCCESS) { + free(data); + return tdb_logerr(tdb, ecode, TDB_LOG_ERROR, + "tdb_transaction_recover:" + " failed to recover %zu bytes" + " at offset %zu", + (size_t)len, (size_t)ofs); + } + p += len; + } + + free(data); + + ecode = transaction_sync(tdb, 0, tdb->file->map_size); + if (ecode != TDB_SUCCESS) { + return tdb_logerr(tdb, ecode, TDB_LOG_ERROR, + "tdb_transaction_recover:" + " failed to sync recovery"); + } + + /* if the recovery area is after the recovered eof then remove it */ + if (recovery_eof <= recovery_head) { + ecode = tdb_write_off(tdb, offsetof(struct tdb_header, + recovery), + 0); + if (ecode != TDB_SUCCESS) { + return tdb_logerr(tdb, ecode, TDB_LOG_ERROR, + "tdb_transaction_recover:" + " failed to remove recovery head"); + } + } + + /* remove the recovery magic */ + ecode = tdb_write_off(tdb, + recovery_head + + offsetof(struct tdb_recovery_record, magic), + TDB_RECOVERY_INVALID_MAGIC); + if (ecode != TDB_SUCCESS) { + return tdb_logerr(tdb, ecode, TDB_LOG_ERROR, + "tdb_transaction_recover:" + " failed to remove recovery magic"); + } + + ecode = transaction_sync(tdb, 0, recovery_eof); + if (ecode != TDB_SUCCESS) { + return tdb_logerr(tdb, ecode, TDB_LOG_ERROR, + "tdb_transaction_recover:" + " failed to sync2 recovery"); + } + + tdb_logerr(tdb, TDB_SUCCESS, TDB_LOG_WARNING, + "tdb_transaction_recover: recovered %zu byte database", + (size_t)recovery_eof); + + /* all done */ + return TDB_SUCCESS; +} + +tdb_bool_err tdb_needs_recovery(struct tdb_context *tdb) +{ + tdb_off_t recovery_head; + struct tdb_recovery_record rec; + enum TDB_ERROR ecode; + + /* find the recovery area */ + recovery_head = tdb_read_off(tdb, offsetof(struct tdb_header,recovery)); + if (TDB_OFF_IS_ERR(recovery_head)) { + return recovery_head; + } + + if (recovery_head == 0) { + /* we have never allocated a recovery record */ + return false; + } + + /* read the recovery record */ + ecode = tdb_read_convert(tdb, recovery_head, &rec, sizeof(rec)); + if (ecode != TDB_SUCCESS) { + return ecode; + } + + return (rec.magic == TDB_RECOVERY_MAGIC); +} diff --git a/lib/tdb2/traverse.c b/lib/tdb2/traverse.c new file mode 100644 index 0000000000..179e095142 --- /dev/null +++ b/lib/tdb2/traverse.c @@ -0,0 +1,99 @@ + /* + Trivial Database 2: traverse function. + Copyright (C) Rusty Russell 2010 + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 3 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, see <http://www.gnu.org/licenses/>. +*/ +#include "private.h" +#include <ccan/likely/likely.h> + +int64_t tdb_traverse_(struct tdb_context *tdb, + int (*fn)(struct tdb_context *, + TDB_DATA, TDB_DATA, void *), + void *p) +{ + enum TDB_ERROR ecode; + struct traverse_info tinfo; + struct tdb_data k, d; + int64_t count = 0; + + k.dptr = NULL; + for (ecode = first_in_hash(tdb, &tinfo, &k, &d.dsize); + ecode == TDB_SUCCESS; + ecode = next_in_hash(tdb, &tinfo, &k, &d.dsize)) { + d.dptr = k.dptr + k.dsize; + + count++; + if (fn && fn(tdb, k, d, p)) { + free(k.dptr); + tdb->last_error = TDB_SUCCESS; + return count; + } + free(k.dptr); + } + + if (ecode != TDB_ERR_NOEXIST) { + return tdb->last_error = ecode; + } + tdb->last_error = TDB_SUCCESS; + return count; +} + +enum TDB_ERROR tdb_firstkey(struct tdb_context *tdb, struct tdb_data *key) +{ + struct traverse_info tinfo; + + return tdb->last_error = first_in_hash(tdb, &tinfo, key, NULL); +} + +/* We lock twice, not very efficient. We could keep last key & tinfo cached. */ +enum TDB_ERROR tdb_nextkey(struct tdb_context *tdb, struct tdb_data *key) +{ + struct traverse_info tinfo; + struct hash_info h; + struct tdb_used_record rec; + + tinfo.prev = find_and_lock(tdb, *key, F_RDLCK, &h, &rec, &tinfo); + free(key->dptr); + if (TDB_OFF_IS_ERR(tinfo.prev)) { + return tdb->last_error = tinfo.prev; + } + tdb_unlock_hashes(tdb, h.hlock_start, h.hlock_range, F_RDLCK); + + return tdb->last_error = next_in_hash(tdb, &tinfo, key, NULL); +} + +static int wipe_one(struct tdb_context *tdb, + TDB_DATA key, TDB_DATA data, enum TDB_ERROR *ecode) +{ + *ecode = tdb_delete(tdb, key); + return (*ecode != TDB_SUCCESS); +} + +enum TDB_ERROR tdb_wipe_all(struct tdb_context *tdb) +{ + enum TDB_ERROR ecode; + int64_t count; + + ecode = tdb_allrecord_lock(tdb, F_WRLCK, TDB_LOCK_WAIT, false); + if (ecode != TDB_SUCCESS) + return tdb->last_error = ecode; + + /* FIXME: Be smarter. */ + count = tdb_traverse(tdb, wipe_one, &ecode); + if (count < 0) + ecode = count; + tdb_allrecord_unlock(tdb, F_WRLCK); + return tdb->last_error = ecode; +} diff --git a/lib/tdb2/wscript b/lib/tdb2/wscript new file mode 100644 index 0000000000..386768f0fc --- /dev/null +++ b/lib/tdb2/wscript @@ -0,0 +1,99 @@ +#!/usr/bin/env python + +APPNAME = 'tdb' +VERSION = '2.0-alpha' + +blddir = 'bin' + +import sys, os + +# find the buildtools directory +srcdir = '.' +while not os.path.exists(srcdir+'/buildtools') and len(srcdir.split('/')) < 5: + srcdir = '../' + srcdir +sys.path.insert(0, srcdir + '/buildtools/wafsamba') + +import wafsamba, samba_dist, Options, Logs + +samba_dist.DIST_DIRS('lib/tdb2:. lib/replace:lib/replace buildtools:buildtools') + +def set_options(opt): + opt.BUILTIN_DEFAULT('replace') + opt.PRIVATE_EXTENSION_DEFAULT('tdb2', noextension='tdb2') + opt.RECURSE('lib/replace') + opt.add_option('--enable-tdb2-breaks-compat', + help=("Build tdb2 instead of tdb1 (BREAKS TDB1!) [False]"), + action="store_true", dest='BUILD_TDB2', default=False) + if opt.IN_LAUNCH_DIR(): + opt.add_option('--disable-python', + help=("disable the pytdb module"), + action="store_true", dest='disable_python', default=False) + +def configure(conf): + if conf.env.BUILD_TDB2: + conf.DEFINE('BUILD_TDB2', 1) + conf.RECURSE('lib/replace') + conf.RECURSE('lib/ccan') + + conf.env.standalone_tdb2 = conf.IN_LAUNCH_DIR() + conf.env.disable_python = getattr(Options.options, 'disable_python', False) + +# if not conf.env.standalone_tdb2: +# if conf.CHECK_BUNDLED_SYSTEM('tdb', minversion=VERSION, +# implied_deps='replace'): +# conf.define('USING_SYSTEM_TDB2', 1) + + conf.SAMBA_CONFIG_H() + +def build(bld): + if bld.env.BUILD_TDB2: + bld.RECURSE('lib/replace') + + if bld.env.standalone_tdb2: + bld.env.PKGCONFIGDIR = '${LIBDIR}/pkgconfig' + bld.PKG_CONFIG_FILES('tdb2.pc', vnum=VERSION) + bld.INSTALL_FILES('${INCLUDEDIR}', 'tdb2.h', flat=True) + private_library = False + else: + private_library = True + + if not bld.CONFIG_SET('USING_SYSTEM_TDB2'): + # FIXME: hide_symbols=True, abi_directory='ABI', abi_match='tdb_*', vnum=VERSION, + bld.SAMBA_LIBRARY('tdb', + '''check.c free.c hash.c io.c lock.c open.c + summary.c tdb.c transaction.c traverse.c''', + deps='replace ccan', + private_library=private_library) + + bld.SAMBA_BINARY('tdb2torture', + 'tools/tdb2torture.c', + 'tdb', + install=False) + + bld.SAMBA_BINARY('tdb2tool', + 'tools/tdb2tool.c', + 'tdb') + + bld.SAMBA_BINARY('tdb2dump', + 'tools/tdb2dump.c', + 'tdb') + + bld.SAMBA_BINARY('tdb2restore', + 'tools/tdb2restore.c', + 'tdb') + + bld.SAMBA_PYTHON('pytdb', + 'pytdb.c', + deps='tdb', + enabled=not bld.env.disable_python, + realname='tdb.so', + cflags='-DPACKAGE_VERSION=\"%s\"' % VERSION) + +def dist(): + '''makes a tarball for distribution''' + samba_dist.dist() + +def reconfigure(ctx): + '''reconfigure if config scripts have changed''' + import samba_utils + samba_utils.reconfigure(ctx) diff --git a/lib/tdb_compat/tdb_compat.c b/lib/tdb_compat/tdb_compat.c new file mode 100644 index 0000000000..a9173fc33d --- /dev/null +++ b/lib/tdb_compat/tdb_compat.c @@ -0,0 +1,102 @@ +#include <tdb_compat.h> + +/* Note: for the moment, we only need this file for TDB2, so we can + * assume waf. */ +#if BUILD_TDB2 +TDB_DATA tdb_null = { NULL, 0 }; + +/* Proxy which sets waitflag to false so we never block. */ +static int lock_nonblock(int fd, int rw, off_t off, off_t len, bool waitflag, + void *_orig) +{ + struct tdb_attribute_flock *orig = _orig; + + return orig->lock(fd, rw, off, len, false, orig->data); +} + +enum TDB_ERROR tdb_transaction_start_nonblock(struct tdb_context *tdb) +{ + union tdb_attribute locking, orig; + enum TDB_ERROR ecode; + + orig.base.attr = TDB_ATTRIBUTE_FLOCK; + ecode = tdb_get_attribute(tdb, &orig); + if (ecode != TDB_SUCCESS) + return ecode; + + /* Replace locking function with our own. */ + locking = orig; + locking.flock.data = &orig; + locking.flock.lock = lock_nonblock; + + ecode = tdb_set_attribute(tdb, &locking); + if (ecode != TDB_SUCCESS) + return ecode; + + ecode = tdb_transaction_start(tdb); + tdb_unset_attribute(tdb, TDB_ATTRIBUTE_FLOCK); + return ecode; +} + +/* + * This handles TDB_CLEAR_IF_FIRST. + */ +static enum TDB_ERROR clear_if_first(int fd, void *unused) +{ + /* We hold a lock offset 63 always, so we can tell if anyone else is. */ + struct flock fl; + + fl.l_type = F_WRLCK; + fl.l_whence = SEEK_SET; + fl.l_start = 63; + fl.l_len = 1; + + if (fcntl(fd, F_SETLK, &fl) == 0) { + /* We must be first ones to open it w/ TDB_CLEAR_IF_FIRST! */ + if (ftruncate(fd, 0) != 0) { + return TDB_ERR_IO; + } + } + fl.l_type = F_RDLCK; + if (fcntl(fd, F_SETLKW, &fl) != 0) { + return TDB_ERR_IO; + } + return TDB_SUCCESS; +} + +struct tdb_context * +tdb_open_compat_(const char *name, int hash_size_unused, + int tdb_flags, int open_flags, mode_t mode, + void (*log_fn)(struct tdb_context *, + enum tdb_log_level, + const char *message, + void *data), + void *log_data) +{ + union tdb_attribute cif, log, *attr = NULL; + + if (log_fn) { + log.log.base.attr = TDB_ATTRIBUTE_LOG; + log.log.base.next = NULL; + log.log.fn = log_fn; + log.log.data = log_data; + attr = &log; + } + + if (tdb_flags & TDB_CLEAR_IF_FIRST) { + cif.openhook.base.attr = TDB_ATTRIBUTE_OPENHOOK; + cif.openhook.base.next = attr; + cif.openhook.fn = clear_if_first; + attr = &cif; + tdb_flags &= ~TDB_CLEAR_IF_FIRST; + } + + /* Testsuite uses this to speed things up. */ + if (getenv("TDB_NO_FSYNC")) { + tdb_flags |= TDB_NOSYNC; + } + + return tdb_open(name, tdb_flags|TDB_ALLOW_NESTING, open_flags, mode, + attr); +} +#endif diff --git a/lib/tdb_compat/tdb_compat.h b/lib/tdb_compat/tdb_compat.h new file mode 100644 index 0000000000..ea401cba49 --- /dev/null +++ b/lib/tdb_compat/tdb_compat.h @@ -0,0 +1,136 @@ +/* + Unix SMB/CIFS implementation. + + Compatibility layer for TDB1 vs TDB2. + + Copyright (C) Rusty Russell 2011 + + ** NOTE! The following LGPL license applies to the tdb_compat + ** library. This does NOT imply that all of Samba is released + ** under the LGPL + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 3 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, see <http://www.gnu.org/licenses/>. +*/ +#ifndef TDB_COMPAT_H +#define TDB_COMPAT_H + +#include "replace.h" +#include <ccan/typesafe_cb/typesafe_cb.h> +#if BUILD_TDB2 +#include <tdb2.h> +#include <fcntl.h> +#include <unistd.h> + +extern TDB_DATA tdb_null; + +/* Old-style tdb_fetch. */ +static inline TDB_DATA tdb_fetch_compat(struct tdb_context *tdb, TDB_DATA k) +{ + TDB_DATA dbuf; + if (tdb_fetch(tdb, k, &dbuf) != TDB_SUCCESS) { + return tdb_null; + } + return dbuf; +} + +static inline TDB_DATA tdb_firstkey_compat(struct tdb_context *tdb) +{ + TDB_DATA k; + if (tdb_firstkey(tdb, &k) != TDB_SUCCESS) { + return tdb_null; + } + return k; +} + +/* Note: this frees the old key.dptr. */ +static inline TDB_DATA tdb_nextkey_compat(struct tdb_context *tdb, TDB_DATA k) +{ + if (tdb_nextkey(tdb, &k) != TDB_SUCCESS) { + return tdb_null; + } + return k; +} + +/* tdb_traverse_read and tdb_traverse are equal: both only take read locks. */ +#define tdb_traverse_read tdb_traverse + +/* Old-style tdb_errorstr */ +#define tdb_errorstr_compat(tdb) tdb_errorstr(tdb_error(tdb)) + +/* This typedef doesn't exist in TDB2. */ +typedef struct tdb_context TDB_CONTEXT; + +/* We don't need these any more. */ +#define tdb_reopen_all(flag) 0 +#define tdb_reopen(tdb) 0 + +/* These no longer exist in tdb2. */ +#define TDB_CLEAR_IF_FIRST 1048576 +#define TDB_INCOMPATIBLE_HASH 0 +#define TDB_VOLATILE 0 + +/* tdb2 does nonblocking functions via attibutes. */ +enum TDB_ERROR tdb_transaction_start_nonblock(struct tdb_context *tdb); + +/* Convenient (typesafe) wrapper for tdb open with logging */ +#define tdb_open_compat(name, hsize, tdb_fl, open_fl, mode, log_fn, log_data) \ + tdb_open_compat_((name), (hsize), (tdb_fl), (open_fl), (mode), \ + typesafe_cb_preargs(void, void *, \ + (log_fn), (log_data), \ + struct tdb_context *, \ + enum tdb_log_level, \ + const char *), \ + (log_data)) + +struct tdb_context * +tdb_open_compat_(const char *name, int hash_size_unused, + int tdb_flags, int open_flags, mode_t mode, + void (*log_fn)(struct tdb_context *, + enum tdb_log_level, + const char *message, + void *data), + void *log_data); +#else +#include <tdb.h> + +/* FIXME: Inlining this is a bit lazy, but eases S3 build. */ +static inline struct tdb_context * +tdb_open_compat(const char *name, int hash_size, + int tdb_flags, int open_flags, mode_t mode, + tdb_log_func log_fn, void *log_private) +{ + struct tdb_logging_context lctx; + lctx.log_fn = log_fn; + lctx.log_private = log_private; + + if (log_fn) + return tdb_open_ex(name, hash_size, tdb_flags, open_flags, + mode, &lctx, NULL); + else + return tdb_open(name, hash_size, tdb_flags, open_flags, mode); +} + +#define tdb_firstkey_compat tdb_firstkey +/* Note: this frees the old key.dptr. */ +static inline TDB_DATA tdb_nextkey_compat(struct tdb_context *tdb, TDB_DATA k) +{ + TDB_DATA next = tdb_nextkey(tdb, k); + free(k.dptr); + return next; +} +#define tdb_errorstr_compat(tdb) tdb_errorstr(tdb) +#define tdb_fetch_compat tdb_fetch +#endif + +#endif /* TDB_COMPAT_H */ diff --git a/lib/tdb_compat/wscript b/lib/tdb_compat/wscript new file mode 100644 index 0000000000..574e67e8ef --- /dev/null +++ b/lib/tdb_compat/wscript @@ -0,0 +1,27 @@ +#!/usr/bin/env python + +import Options + +def set_options(opt): + opt.RECURSE('lib/tdb2') + opt.RECURSE('lib/tdb') + +def configure(conf): + conf.env.BUILD_TDB2 = getattr(Options.options, 'BUILD_TDB2', False) + + if conf.env.BUILD_TDB2: + conf.RECURSE('lib/tdb2') + else: + conf.RECURSE('lib/tdb') + conf.RECURSE('lib/ccan') + +def build(bld): + bld.RECURSE('lib/ccan') + if bld.env.BUILD_TDB2: + bld.RECURSE('lib/tdb2') + else: + bld.RECURSE('lib/tdb') + bld.SAMBA_LIBRARY('tdb_compat', + source='tdb_compat.c', + deps='replace tdb ccan', + private_library=True) diff --git a/lib/tevent/ABI/tevent-0.9.12.sigs b/lib/tevent/ABI/tevent-0.9.12.sigs new file mode 100644 index 0000000000..df9b08dfd5 --- /dev/null +++ b/lib/tevent/ABI/tevent-0.9.12.sigs @@ -0,0 +1,74 @@ +_tevent_add_fd: struct tevent_fd *(struct tevent_context *, TALLOC_CTX *, int, uint16_t, tevent_fd_handler_t, void *, const char *, const char *) +_tevent_add_signal: struct tevent_signal *(struct tevent_context *, TALLOC_CTX *, int, int, tevent_signal_handler_t, void *, const char *, const char *) +_tevent_add_timer: struct tevent_timer *(struct tevent_context *, TALLOC_CTX *, struct timeval, tevent_timer_handler_t, void *, const char *, const char *) +_tevent_create_immediate: struct tevent_immediate *(TALLOC_CTX *, const char *) +_tevent_loop_once: int (struct tevent_context *, const char *) +_tevent_loop_until: int (struct tevent_context *, bool (*)(void *), void *, const char *) +_tevent_loop_wait: int (struct tevent_context *, const char *) +_tevent_queue_create: struct tevent_queue *(TALLOC_CTX *, const char *, const char *) +_tevent_req_callback_data: void *(struct tevent_req *) +_tevent_req_cancel: bool (struct tevent_req *, const char *) +_tevent_req_create: struct tevent_req *(TALLOC_CTX *, void *, size_t, const char *, const char *) +_tevent_req_data: void *(struct tevent_req *) +_tevent_req_done: void (struct tevent_req *, const char *) +_tevent_req_error: bool (struct tevent_req *, uint64_t, const char *) +_tevent_req_nomem: bool (const void *, struct tevent_req *, const char *) +_tevent_req_notify_callback: void (struct tevent_req *, const char *) +_tevent_req_oom: void (struct tevent_req *, const char *) +_tevent_schedule_immediate: void (struct tevent_immediate *, struct tevent_context *, tevent_immediate_handler_t, void *, const char *, const char *) +tevent_backend_list: const char **(TALLOC_CTX *) +tevent_cleanup_pending_signal_handlers: void (struct tevent_signal *) +tevent_common_add_fd: struct tevent_fd *(struct tevent_context *, TALLOC_CTX *, int, uint16_t, tevent_fd_handler_t, void *, const char *, const char *) +tevent_common_add_signal: struct tevent_signal *(struct tevent_context *, TALLOC_CTX *, int, int, tevent_signal_handler_t, void *, const char *, const char *) +tevent_common_add_timer: struct tevent_timer *(struct tevent_context *, TALLOC_CTX *, struct timeval, tevent_timer_handler_t, void *, const char *, const char *) +tevent_common_check_signal: int (struct tevent_context *) +tevent_common_context_destructor: int (struct tevent_context *) +tevent_common_fd_destructor: int (struct tevent_fd *) +tevent_common_fd_get_flags: uint16_t (struct tevent_fd *) +tevent_common_fd_set_close_fn: void (struct tevent_fd *, tevent_fd_close_fn_t) +tevent_common_fd_set_flags: void (struct tevent_fd *, uint16_t) +tevent_common_loop_immediate: bool (struct tevent_context *) +tevent_common_loop_timer_delay: struct timeval (struct tevent_context *) +tevent_common_loop_wait: int (struct tevent_context *, const char *) +tevent_common_schedule_immediate: void (struct tevent_immediate *, struct tevent_context *, tevent_immediate_handler_t, void *, const char *, const char *) +tevent_context_init: struct tevent_context *(TALLOC_CTX *) +tevent_context_init_byname: struct tevent_context *(TALLOC_CTX *, const char *) +tevent_debug: void (struct tevent_context *, enum tevent_debug_level, const char *, ...) +tevent_fd_get_flags: uint16_t (struct tevent_fd *) +tevent_fd_set_auto_close: void (struct tevent_fd *) +tevent_fd_set_close_fn: void (struct tevent_fd *, tevent_fd_close_fn_t) +tevent_fd_set_flags: void (struct tevent_fd *, uint16_t) +tevent_loop_allow_nesting: void (struct tevent_context *) +tevent_loop_set_nesting_hook: void (struct tevent_context *, tevent_nesting_hook, void *) +tevent_queue_add: bool (struct tevent_queue *, struct tevent_context *, struct tevent_req *, tevent_queue_trigger_fn_t, void *) +tevent_queue_length: size_t (struct tevent_queue *) +tevent_queue_start: void (struct tevent_queue *) +tevent_queue_stop: void (struct tevent_queue *) +tevent_re_initialise: int (struct tevent_context *) +tevent_register_backend: bool (const char *, const struct tevent_ops *) +tevent_req_default_print: char *(struct tevent_req *, TALLOC_CTX *) +tevent_req_is_error: bool (struct tevent_req *, enum tevent_req_state *, uint64_t *) +tevent_req_is_in_progress: bool (struct tevent_req *) +tevent_req_poll: bool (struct tevent_req *, struct tevent_context *) +tevent_req_post: struct tevent_req *(struct tevent_req *, struct tevent_context *) +tevent_req_print: char *(TALLOC_CTX *, struct tevent_req *) +tevent_req_received: void (struct tevent_req *) +tevent_req_set_callback: void (struct tevent_req *, tevent_req_fn, void *) +tevent_req_set_cancel_fn: void (struct tevent_req *, tevent_req_cancel_fn) +tevent_req_set_endtime: bool (struct tevent_req *, struct tevent_context *, struct timeval) +tevent_req_set_print_fn: void (struct tevent_req *, tevent_req_print_fn) +tevent_set_abort_fn: void (void (*)(const char *)) +tevent_set_debug: int (struct tevent_context *, void (*)(void *, enum tevent_debug_level, const char *, va_list), void *) +tevent_set_debug_stderr: int (struct tevent_context *) +tevent_set_default_backend: void (const char *) +tevent_signal_support: bool (struct tevent_context *) +tevent_timeval_add: struct timeval (const struct timeval *, uint32_t, uint32_t) +tevent_timeval_compare: int (const struct timeval *, const struct timeval *) +tevent_timeval_current: struct timeval (void) +tevent_timeval_current_ofs: struct timeval (uint32_t, uint32_t) +tevent_timeval_is_zero: bool (const struct timeval *) +tevent_timeval_set: struct timeval (uint32_t, uint32_t) +tevent_timeval_until: struct timeval (const struct timeval *, const struct timeval *) +tevent_timeval_zero: struct timeval (void) +tevent_wakeup_recv: bool (struct tevent_req *) +tevent_wakeup_send: struct tevent_req *(TALLOC_CTX *, struct tevent_context *, struct timeval) diff --git a/lib/tevent/tevent.h b/lib/tevent/tevent.h index 665c491ebb..8204a28fbe 100644 --- a/lib/tevent/tevent.h +++ b/lib/tevent/tevent.h @@ -136,7 +136,7 @@ struct tevent_context *tevent_context_init_byname(TALLOC_CTX *mem_ctx, const cha const char **tevent_backend_list(TALLOC_CTX *mem_ctx); /** - * @brief Set the default tevent backent. + * @brief Set the default tevent backend. * * @param[in] backend The name of the backend to set. */ @@ -995,6 +995,20 @@ bool _tevent_req_nomem(const void *p, _tevent_req_nomem(p, req, __location__) #endif +#ifdef DOXYGEN +/** + * @brief Indicate out of memory to a request + * + * @param[in] req The request being processed. + */ +void tevent_req_oom(struct tevent_req *req); +#else +void _tevent_req_oom(struct tevent_req *req, + const char *location); +#define tevent_req_oom(req) \ + _tevent_req_oom(req, __location__) +#endif + /** * @brief Finish a request before the caller had the change to set the callback. * @@ -1218,7 +1232,7 @@ struct timeval tevent_timeval_current(void); * * @param[in] secs The seconds to set. * - * @param[in] usecs The milliseconds to set. + * @param[in] usecs The microseconds to set. * * @return A timeval structure with the given values. */ @@ -1253,7 +1267,7 @@ bool tevent_timeval_is_zero(const struct timeval *tv); * * @param[in] secs The seconds to add to the timeval. * - * @param[in] usecs The milliseconds to add to the timeval. + * @param[in] usecs The microseconds to add to the timeval. * * @return The timeval structure with the new time. */ @@ -1265,7 +1279,7 @@ struct timeval tevent_timeval_add(const struct timeval *tv, uint32_t secs, * * @param[in] secs The seconds of the offset from now. * - * @param[in] usecs The milliseconds of the offset from now. + * @param[in] usecs The microseconds of the offset from now. * * @return A timval with the given offset in the future. */ diff --git a/lib/tevent/tevent_poll.c b/lib/tevent/tevent_poll.c index 712255b373..0b782e99bb 100644 --- a/lib/tevent/tevent_poll.c +++ b/lib/tevent/tevent_poll.c @@ -233,7 +233,19 @@ static int poll_event_loop_poll(struct tevent_context *ev, pfd = &poll_ev->fds[pfd_idx]; - if (pfd->revents & (POLLIN|POLLHUP|POLLERR)) { + if (pfd->revents & (POLLHUP|POLLERR)) { + /* If we only wait for TEVENT_FD_WRITE, we + should not tell the event handler about it, + and remove the writable flag, as we only + report errors when waiting for read events + to match the select behavior. */ + if (!(fde->flags & TEVENT_FD_READ)) { + TEVENT_FD_NOT_WRITEABLE(fde); + continue; + } + flags |= TEVENT_FD_READ; + } + if (pfd->revents & POLLIN) { flags |= TEVENT_FD_READ; } if (pfd->revents & POLLOUT) { diff --git a/lib/tevent/tevent_req.c b/lib/tevent/tevent_req.c index b0c9c57dde..92697b7df9 100644 --- a/lib/tevent/tevent_req.c +++ b/lib/tevent/tevent_req.c @@ -123,6 +123,11 @@ bool _tevent_req_error(struct tevent_req *req, return true; } +void _tevent_req_oom(struct tevent_req *req, const char *location) +{ + tevent_req_finish(req, TEVENT_REQ_NO_MEMORY, location); +} + bool _tevent_req_nomem(const void *p, struct tevent_req *req, const char *location) @@ -130,7 +135,7 @@ bool _tevent_req_nomem(const void *p, if (p != NULL) { return false; } - tevent_req_finish(req, TEVENT_REQ_NO_MEMORY, location); + _tevent_req_oom(req, location); return true; } diff --git a/lib/tevent/wscript b/lib/tevent/wscript index 75c44c2962..5dcd18814b 100644 --- a/lib/tevent/wscript +++ b/lib/tevent/wscript @@ -1,7 +1,7 @@ #!/usr/bin/env python APPNAME = 'tevent' -VERSION = '0.9.11' +VERSION = '0.9.12' blddir = 'bin' diff --git a/lib/torture/torture.c b/lib/torture/torture.c index a0b35bfe7c..cee6bdb934 100644 --- a/lib/torture/torture.c +++ b/lib/torture/torture.c @@ -88,7 +88,7 @@ _PUBLIC_ NTSTATUS torture_temp_dir(struct torture_context *tctx, NT_STATUS_HAVE_NO_MEMORY(*tempdir); if (mkdtemp(*tempdir) == NULL) { - return map_nt_error_from_unix(errno); + return map_nt_error_from_unix_common(errno); } return NT_STATUS_OK; @@ -154,7 +154,7 @@ _PUBLIC_ NTSTATUS torture_deltree_outputdir(struct torture_context *tctx) if (local_deltree(tctx->outputdir) == -1) { if (errno != 0) { - return map_nt_error_from_unix(errno); + return map_nt_error_from_unix_common(errno); } return NT_STATUS_UNSUCCESSFUL; } diff --git a/lib/tsocket/tsocket_helpers.c b/lib/tsocket/tsocket_helpers.c index 3a41a3efc3..db6b6148e9 100644 --- a/lib/tsocket/tsocket_helpers.c +++ b/lib/tsocket/tsocket_helpers.c @@ -73,7 +73,7 @@ struct tevent_req *tdgram_sendto_queue_send(TALLOC_CTX *mem_ctx, tdgram_sendto_queue_trigger, NULL); if (!ok) { - tevent_req_nomem(NULL, req); + tevent_req_oom(req); goto post; } @@ -346,7 +346,7 @@ struct tevent_req *tstream_readv_pdu_queue_send(TALLOC_CTX *mem_ctx, tstream_readv_pdu_queue_trigger, NULL); if (!ok) { - tevent_req_nomem(NULL, req); + tevent_req_oom(req); goto post; } @@ -453,7 +453,7 @@ struct tevent_req *tstream_writev_queue_send(TALLOC_CTX *mem_ctx, tstream_writev_queue_trigger, NULL); if (!ok) { - tevent_req_nomem(NULL, req); + tevent_req_oom(req); goto post; } diff --git a/lib/util/asn1.c b/lib/util/asn1.c index b716da63c0..c23bf65b8d 100644 --- a/lib/util/asn1.c +++ b/lib/util/asn1.c @@ -885,10 +885,19 @@ bool asn1_read_ContextSimple(struct asn1_data *data, uint8_t num, DATA_BLOB *blo bool asn1_read_implicit_Integer(struct asn1_data *data, int *i) { uint8_t b; + bool first_byte = true; *i = 0; while (!data->has_error && asn1_tag_remaining(data)>0) { if (!asn1_read_uint8(data, &b)) return false; + if (first_byte) { + if (b & 0x80) { + /* Number is negative. + Set i to -1 for sign extend. */ + *i = -1; + } + first_byte = false; + } *i = (*i << 8) + b; } return !data->has_error; diff --git a/lib/util/byteorder.h b/lib/util/byteorder.h index 59ad8371e4..6bcf71e83b 100644 --- a/lib/util/byteorder.h +++ b/lib/util/byteorder.h @@ -201,18 +201,29 @@ static __inline__ void st_le32(uint32_t *addr, const uint32_t val) #endif /* not CAREFUL_ALIGNMENT */ +/* 64 bit macros */ +#define BVAL(p, ofs) (IVAL(p,ofs) | (((uint64_t)IVAL(p,(ofs)+4)) << 32)) +#define BVALS(p, ofs) ((int64_t)BVAL(p,ofs)) +#define SBVAL(p, ofs, v) (SIVAL(p,ofs,(v)&0xFFFFFFFF), SIVAL(p,(ofs)+4,((uint64_t)(v))>>32)) +#define SBVALS(p, ofs, v) (SBVAL(p,ofs,(uint64_t)v)) + /* now the reverse routines - these are used in nmb packets (mostly) */ #define SREV(x) ((((x)&0xFF)<<8) | (((x)>>8)&0xFF)) #define IREV(x) ((SREV(x)<<16) | (SREV((x)>>16))) +#define BREV(x) ((IREV(x)<<32) | (IREV((x)>>32))) #define RSVAL(buf,pos) SREV(SVAL(buf,pos)) #define RSVALS(buf,pos) SREV(SVALS(buf,pos)) #define RIVAL(buf,pos) IREV(IVAL(buf,pos)) #define RIVALS(buf,pos) IREV(IVALS(buf,pos)) +#define RBVAL(buf,pos) BREV(BVAL(buf,pos)) +#define RBVALS(buf,pos) BREV(BVALS(buf,pos)) #define RSSVAL(buf,pos,val) SSVAL(buf,pos,SREV(val)) #define RSSVALS(buf,pos,val) SSVALS(buf,pos,SREV(val)) #define RSIVAL(buf,pos,val) SIVAL(buf,pos,IREV(val)) #define RSIVALS(buf,pos,val) SIVALS(buf,pos,IREV(val)) +#define RSBVAL(buf,pos,val) SBVAL(buf,pos,BREV(val)) +#define RSBVALS(buf,pos,val) SBVALS(buf,pos,BREV(val)) /* Alignment macros. */ #define ALIGN4(p,base) ((p) + ((4 - (PTR_DIFF((p), (base)) & 3)) & 3)) @@ -222,10 +233,4 @@ static __inline__ void st_le32(uint32_t *addr, const uint32_t val) /* macros for accessing SMB protocol elements */ #define VWV(vwv) ((vwv)*2) -/* 64 bit macros */ -#define BVAL(p, ofs) (IVAL(p,ofs) | (((uint64_t)IVAL(p,(ofs)+4)) << 32)) -#define BVALS(p, ofs) ((int64_t)BVAL(p,ofs)) -#define SBVAL(p, ofs, v) (SIVAL(p,ofs,(v)&0xFFFFFFFF), SIVAL(p,(ofs)+4,((uint64_t)(v))>>32)) -#define SBVALS(p, ofs, v) (SBVAL(p,ofs,(uint64_t)v)) - #endif /* _BYTEORDER_H */ diff --git a/lib/util/charset/CP437.c b/lib/util/charset/CP437.c new file mode 100644 index 0000000000..1e478d678f --- /dev/null +++ b/lib/util/charset/CP437.c @@ -0,0 +1,135 @@ +/* + * Conversion table for CP437 charset also known as IBM437 + * + * Copyright (C) Alexander Bokovoy 2003 + * + * Conversion tables are generated using GNU libc 2.2.5's + * localedata/charmaps/IBM437 table and source/script/gen-8bit-gap.sh script + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "includes.h" + +static const uint16_t to_ucs2[256] = { + 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, + 0x0008, 0x0009, 0x000A, 0x000B, 0x000C, 0x000D, 0x000E, 0x000F, + 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, + 0x0018, 0x0019, 0x001A, 0x001B, 0x001C, 0x001D, 0x001E, 0x001F, + 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, + 0x0028, 0x0029, 0x002A, 0x002B, 0x002C, 0x002D, 0x002E, 0x002F, + 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, + 0x0038, 0x0039, 0x003A, 0x003B, 0x003C, 0x003D, 0x003E, 0x003F, + 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, + 0x0048, 0x0049, 0x004A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, + 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, + 0x0058, 0x0059, 0x005A, 0x005B, 0x005C, 0x005D, 0x005E, 0x005F, + 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, + 0x0068, 0x0069, 0x006A, 0x006B, 0x006C, 0x006D, 0x006E, 0x006F, + 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, + 0x0078, 0x0079, 0x007A, 0x007B, 0x007C, 0x007D, 0x007E, 0x007F, + 0x00C7, 0x00FC, 0x00E9, 0x00E2, 0x00E4, 0x00E0, 0x00E5, 0x00E7, + 0x00EA, 0x00EB, 0x00E8, 0x00EF, 0x00EE, 0x00EC, 0x00C4, 0x00C5, + 0x00C9, 0x00E6, 0x00C6, 0x00F4, 0x00F6, 0x00F2, 0x00FB, 0x00F9, + 0x00FF, 0x00D6, 0x00DC, 0x00A2, 0x00A3, 0x00A5, 0x20A7, 0x0192, + 0x00E1, 0x00ED, 0x00F3, 0x00FA, 0x00F1, 0x00D1, 0x00AA, 0x00BA, + 0x00BF, 0x2310, 0x00AC, 0x00BD, 0x00BC, 0x00A1, 0x00AB, 0x00BB, + 0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x2561, 0x2562, 0x2556, + 0x2555, 0x2563, 0x2551, 0x2557, 0x255D, 0x255C, 0x255B, 0x2510, + 0x2514, 0x2534, 0x252C, 0x251C, 0x2500, 0x253C, 0x255E, 0x255F, + 0x255A, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256C, 0x2567, + 0x2568, 0x2564, 0x2565, 0x2559, 0x2558, 0x2552, 0x2553, 0x256B, + 0x256A, 0x2518, 0x250C, 0x2588, 0x2584, 0x258C, 0x2590, 0x2580, + 0x03B1, 0x00DF, 0x0393, 0x03C0, 0x03A3, 0x03C3, 0x00B5, 0x03C4, + 0x03A6, 0x0398, 0x03A9, 0x03B4, 0x221E, 0x03C6, 0x03B5, 0x2229, + 0x2261, 0x00B1, 0x2265, 0x2264, 0x2320, 0x2321, 0x00F7, 0x2248, + 0x00B0, 0x2219, 0x00B7, 0x221A, 0x207F, 0x00B2, 0x25A0, 0x00A0, +}; + +static const struct charset_gap_table from_idx[] = { + { 0x0000, 0x007f, 0 }, + { 0x00a0, 0x00c9, -32 }, + { 0x00d1, 0x00ff, -39 }, + { 0x0192, 0x0192, -185 }, + { 0x0393, 0x0398, -697 }, + { 0x03a3, 0x03a9, -707 }, + { 0x03b1, 0x03b5, -714 }, + { 0x03c0, 0x03c6, -724 }, + { 0x207f, 0x207f, -8076 }, + { 0x20a7, 0x20a7, -8115 }, + { 0x2219, 0x221e, -8484 }, + { 0x2229, 0x2229, -8494 }, + { 0x2248, 0x2248, -8524 }, + { 0x2261, 0x2265, -8548 }, + { 0x2310, 0x2310, -8718 }, + { 0x2320, 0x2321, -8733 }, + { 0x2500, 0x2502, -9211 }, + { 0x250c, 0x251c, -9220 }, + { 0x2524, 0x2524, -9227 }, + { 0x252c, 0x252c, -9234 }, + { 0x2534, 0x2534, -9241 }, + { 0x253c, 0x253c, -9248 }, + { 0x2550, 0x256c, -9267 }, + { 0x2580, 0x2593, -9286 }, + { 0x25a0, 0x25a0, -9298 }, + { 0xffff, 0xffff, 0 } +}; + +static const unsigned char from_ucs2[] = { + + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, + 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, + 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, + 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, + 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, + 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, + 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, + 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, + 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, + 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, + 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, + 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, + 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, + 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, + 0xff, 0xad, 0x9b, 0x9c, 0x00, 0x9d, 0x00, 0x00, + 0x00, 0x00, 0xa6, 0xae, 0xaa, 0x00, 0x00, 0x00, + 0xf8, 0xf1, 0xfd, 0x00, 0x00, 0xe6, 0x00, 0xfa, + 0x00, 0x00, 0xa7, 0xaf, 0xac, 0xab, 0x00, 0xa8, + 0x00, 0x00, 0x00, 0x00, 0x8e, 0x8f, 0x92, 0x80, + 0x00, 0x90, 0xa5, 0x00, 0x00, 0x00, 0x00, 0x99, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x9a, 0x00, 0x00, + 0xe1, 0x85, 0xa0, 0x83, 0x00, 0x84, 0x86, 0x91, + 0x87, 0x8a, 0x82, 0x88, 0x89, 0x8d, 0xa1, 0x8c, + 0x8b, 0x00, 0xa4, 0x95, 0xa2, 0x93, 0x00, 0x94, + 0xf6, 0x00, 0x97, 0xa3, 0x96, 0x81, 0x00, 0x00, + 0x98, 0x9f, 0xe2, 0x00, 0x00, 0x00, 0x00, 0xe9, + 0xe4, 0x00, 0x00, 0xe8, 0x00, 0x00, 0xea, 0xe0, + 0x00, 0x00, 0xeb, 0xee, 0xe3, 0x00, 0x00, 0xe5, + 0xe7, 0x00, 0xed, 0xfc, 0x9e, 0xf9, 0xfb, 0x00, + 0x00, 0x00, 0xec, 0xef, 0xf7, 0xf0, 0x00, 0x00, + 0xf3, 0xf2, 0xa9, 0xf4, 0xf5, 0xc4, 0x00, 0xb3, + 0xda, 0x00, 0x00, 0x00, 0xbf, 0x00, 0x00, 0x00, + 0xc0, 0x00, 0x00, 0x00, 0xd9, 0x00, 0x00, 0x00, + 0xc3, 0xb4, 0xc2, 0xc1, 0xc5, 0xcd, 0xba, 0xd5, + 0xd6, 0xc9, 0xb8, 0xb7, 0xbb, 0xd4, 0xd3, 0xc8, + 0xbe, 0xbd, 0xbc, 0xc6, 0xc7, 0xcc, 0xb5, 0xb6, + 0xb9, 0xd1, 0xd2, 0xcb, 0xcf, 0xd0, 0xca, 0xd8, + 0xd7, 0xce, 0xdf, 0x00, 0x00, 0x00, 0xdc, 0x00, + 0x00, 0x00, 0xdb, 0x00, 0x00, 0x00, 0xdd, 0x00, + 0x00, 0x00, 0xde, 0xb0, 0xb1, 0xb2, 0xfe, +}; + +SMB_GENERATE_CHARSET_MODULE_8_BIT_GAP(CP437) diff --git a/lib/util/charset/CP850.c b/lib/util/charset/CP850.c new file mode 100644 index 0000000000..87a76f4cdf --- /dev/null +++ b/lib/util/charset/CP850.c @@ -0,0 +1,121 @@ +/* + * Conversion table for CP850 charset also known as IBM850. + * + * Copyright (C) Alexander Bokovoy 2003 + * + * Conversion tables are generated using GNU libc 2.2.5's + * localedata/charmaps/IBM850 table and source/script/gen-8bit-gap.sh script + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "includes.h" + +static const uint16_t to_ucs2[256] = { + 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, + 0x0008, 0x0009, 0x000A, 0x000B, 0x000C, 0x000D, 0x000E, 0x000F, + 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, + 0x0018, 0x0019, 0x001A, 0x001B, 0x001C, 0x001D, 0x001E, 0x001F, + 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, + 0x0028, 0x0029, 0x002A, 0x002B, 0x002C, 0x002D, 0x002E, 0x002F, + 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, + 0x0038, 0x0039, 0x003A, 0x003B, 0x003C, 0x003D, 0x003E, 0x003F, + 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, + 0x0048, 0x0049, 0x004A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, + 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, + 0x0058, 0x0059, 0x005A, 0x005B, 0x005C, 0x005D, 0x005E, 0x005F, + 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, + 0x0068, 0x0069, 0x006A, 0x006B, 0x006C, 0x006D, 0x006E, 0x006F, + 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, + 0x0078, 0x0079, 0x007A, 0x007B, 0x007C, 0x007D, 0x007E, 0x007F, + 0x00C7, 0x00FC, 0x00E9, 0x00E2, 0x00E4, 0x00E0, 0x00E5, 0x00E7, + 0x00EA, 0x00EB, 0x00E8, 0x00EF, 0x00EE, 0x00EC, 0x00C4, 0x00C5, + 0x00C9, 0x00E6, 0x00C6, 0x00F4, 0x00F6, 0x00F2, 0x00FB, 0x00F9, + 0x00FF, 0x00D6, 0x00DC, 0x00F8, 0x00A3, 0x00D8, 0x00D7, 0x0192, + 0x00E1, 0x00ED, 0x00F3, 0x00FA, 0x00F1, 0x00D1, 0x00AA, 0x00BA, + 0x00BF, 0x00AE, 0x00AC, 0x00BD, 0x00BC, 0x00A1, 0x00AB, 0x00BB, + 0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x00C1, 0x00C2, 0x00C0, + 0x00A9, 0x2563, 0x2551, 0x2557, 0x255D, 0x00A2, 0x00A5, 0x2510, + 0x2514, 0x2534, 0x252C, 0x251C, 0x2500, 0x253C, 0x00E3, 0x00C3, + 0x255A, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256C, 0x00A4, + 0x00F0, 0x00D0, 0x00CA, 0x00CB, 0x00C8, 0x0131, 0x00CD, 0x00CE, + 0x00CF, 0x2518, 0x250C, 0x2588, 0x2584, 0x00A6, 0x00CC, 0x2580, + 0x00D3, 0x00DF, 0x00D4, 0x00D2, 0x00F5, 0x00D5, 0x00B5, 0x00FE, + 0x00DE, 0x00DA, 0x00DB, 0x00D9, 0x00FD, 0x00DD, 0x00AF, 0x00B4, + 0x00AD, 0x00B1, 0x2017, 0x00BE, 0x00B6, 0x00A7, 0x00F7, 0x00B8, + 0x00B0, 0x00A8, 0x00B7, 0x00B9, 0x00B3, 0x00B2, 0x25A0, 0x00A0, +}; + +static const struct charset_gap_table from_idx[] = { + /* start, end, idx */ + { 0x0000, 0x007f, 0 }, + { 0x00a0, 0x00ff, -32 }, + { 0x0131, 0x0131, -81 }, + { 0x0192, 0x0192, -177 }, + { 0x2017, 0x2017, -7989 }, + { 0x2500, 0x2502, -9245 }, + { 0x250c, 0x251c, -9254 }, + { 0x2524, 0x2524, -9261 }, + { 0x252c, 0x252c, -9268 }, + { 0x2534, 0x2534, -9275 }, + { 0x253c, 0x253c, -9282 }, + { 0x2550, 0x256c, -9301 }, + { 0x2580, 0x2588, -9320 }, + { 0x2591, 0x2593, -9328 }, + { 0x25a0, 0x25a0, -9340 }, + { 0xffff, 0xffff, 0 } +}; +static const unsigned char from_ucs2[] = { + + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, + 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, + 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, + 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, + 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, + 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, + 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, + 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, + 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, + 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, + 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, + 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, + 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, + 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, + 0xff, 0xad, 0xbd, 0x9c, 0xcf, 0xbe, 0xdd, 0xf5, + 0xf9, 0xb8, 0xa6, 0xae, 0xaa, 0xf0, 0xa9, 0xee, + 0xf8, 0xf1, 0xfd, 0xfc, 0xef, 0xe6, 0xf4, 0xfa, + 0xf7, 0xfb, 0xa7, 0xaf, 0xac, 0xab, 0xf3, 0xa8, + 0xb7, 0xb5, 0xb6, 0xc7, 0x8e, 0x8f, 0x92, 0x80, + 0xd4, 0x90, 0xd2, 0xd3, 0xde, 0xd6, 0xd7, 0xd8, + 0xd1, 0xa5, 0xe3, 0xe0, 0xe2, 0xe5, 0x99, 0x9e, + 0x9d, 0xeb, 0xe9, 0xea, 0x9a, 0xed, 0xe8, 0xe1, + 0x85, 0xa0, 0x83, 0xc6, 0x84, 0x86, 0x91, 0x87, + 0x8a, 0x82, 0x88, 0x89, 0x8d, 0xa1, 0x8c, 0x8b, + 0xd0, 0xa4, 0x95, 0xa2, 0x93, 0xe4, 0x94, 0xf6, + 0x9b, 0x97, 0xa3, 0x96, 0x81, 0xec, 0xe7, 0x98, + 0xd5, 0x9f, 0xf2, 0xc4, 0x00, 0xb3, 0xda, 0x00, + 0x00, 0x00, 0xbf, 0x00, 0x00, 0x00, 0xc0, 0x00, + 0x00, 0x00, 0xd9, 0x00, 0x00, 0x00, 0xc3, 0xb4, + 0xc2, 0xc1, 0xc5, 0xcd, 0xba, 0x00, 0x00, 0xc9, + 0x00, 0x00, 0xbb, 0x00, 0x00, 0xc8, 0x00, 0x00, + 0xbc, 0x00, 0x00, 0xcc, 0x00, 0x00, 0xb9, 0x00, + 0x00, 0xcb, 0x00, 0x00, 0xca, 0x00, 0x00, 0xce, + 0xdf, 0x00, 0x00, 0x00, 0xdc, 0x00, 0x00, 0x00, + 0xdb, 0xb0, 0xb1, 0xb2, 0xfe, +}; + +SMB_GENERATE_CHARSET_MODULE_8_BIT_GAP(CP850) + diff --git a/lib/util/charset/charcnv.c b/lib/util/charset/charcnv.c index 998bb08fd7..076795a0b2 100644 --- a/lib/util/charset/charcnv.c +++ b/lib/util/charset/charcnv.c @@ -113,138 +113,3 @@ convert: return destlen; } - -/** - * Convert string from one encoding to another, making error checking etc - * - * @param src pointer to source string (multibyte or singlebyte) - * @param srclen length of the source string in bytes - * @param dest pointer to destination string (multibyte or singlebyte) - * @param destlen maximal length allowed for string - * @returns the number of bytes occupied in the destination - * on error, returns -1, and sets errno - **/ -_PUBLIC_ bool convert_string_error_handle(struct smb_iconv_handle *ic, - charset_t from, charset_t to, - void const *src, size_t srclen, - void *dest, size_t destlen, - size_t *converted_size) -{ - size_t i_len, o_len; - ssize_t retval; - const char* inbuf = (const char*)src; - char* outbuf = (char*)dest; - smb_iconv_t descriptor; - - if (srclen == (size_t)-1) - srclen = strlen(inbuf)+1; - - descriptor = get_conv_handle(ic, from, to); - if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) { - if (converted_size) { - *converted_size = 0; - } - errno = EINVAL; - return -1; - } - - i_len=srclen; - o_len=destlen; - - retval = smb_iconv(descriptor, &inbuf, &i_len, &outbuf, &o_len); - - if (converted_size != NULL) - *converted_size = destlen-o_len; - return (retval != (ssize_t)-1); -} - - -/** - * Convert string from one encoding to another, making error checking etc - * - * @param src pointer to source string (multibyte or singlebyte) - * @param srclen length of the source string in bytes - * @param dest pointer to destination string (multibyte or singlebyte) - * @param destlen maximal length allowed for string - * @returns the number of bytes occupied in the destination - **/ -_PUBLIC_ bool convert_string_handle(struct smb_iconv_handle *ic, - charset_t from, charset_t to, - void const *src, size_t srclen, - void *dest, size_t destlen, size_t *converted_size) -{ - bool retval; - - retval = convert_string_error_handle(ic, from, to, src, srclen, dest, destlen, converted_size); - if(retval==false) { - const char *reason; - switch(errno) { - case EINVAL: - reason="Incomplete multibyte sequence"; - return false; - case E2BIG: - reason="No more room"; - if (from == CH_UNIX) { - DEBUG(0,("E2BIG: convert_string_handle(%s,%s): srclen=%d destlen=%d - '%s'\n", - charset_name(ic, from), charset_name(ic, to), - (int)srclen, (int)destlen, - (const char *)src)); - } else { - DEBUG(0,("E2BIG: convert_string_handle(%s,%s): srclen=%d destlen=%d\n", - charset_name(ic, from), charset_name(ic, to), - (int)srclen, (int)destlen)); - } - return false; - case EILSEQ: - reason="Illegal multibyte sequence"; - return false; - default: - return false; - } - } - return true; -} - -/** - * Convert between character sets, allocating a new buffer using talloc for the result. - * - * @param srclen length of source buffer. - * @param dest always set at least to NULL - * @note -1 is not accepted for srclen. - * - * @returns Size in bytes of the converted string; or -1 in case of error. - **/ - -_PUBLIC_ bool convert_string_talloc_handle(TALLOC_CTX *ctx, - struct smb_iconv_handle *ic, - charset_t from, charset_t to, - void const *src, size_t srclen, - void *dst, size_t *converted_size) -{ - void **dest = (void **)dst; - smb_iconv_t descriptor; - ssize_t ret; - - *dest = NULL; - - if (src == NULL || srclen == (size_t)-1 || srclen == 0) - return false; - - descriptor = get_conv_handle(ic, from, to); - - if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) { - /* conversion not supported, return -1*/ - DEBUG(3, ("convert_string_talloc_handle: conversion from %s to %s not supported!\n", - charset_name(ic, from), - charset_name(ic, to))); - return false; - } - - ret = iconv_talloc(ctx, descriptor, src, srclen, dest); - if (ret == -1) - return false; - if (converted_size != NULL) - *converted_size = ret; - return true; -} - diff --git a/lib/util/charset/charset.h b/lib/util/charset/charset.h index 1078035592..b36c461003 100644 --- a/lib/util/charset/charset.h +++ b/lib/util/charset/charset.h @@ -28,7 +28,7 @@ #include <talloc.h> /* this defines the charset types used in samba */ -typedef enum {CH_UTF16LE=0, CH_UTF16=0, CH_UNIX, CH_DISPLAY, CH_DOS, CH_UTF8, CH_UTF16BE, CH_UTF16MUNGED} charset_t; +typedef enum {CH_UTF16LE=0, CH_UTF16=0, CH_UNIX, CH_DOS, CH_UTF8, CH_UTF16BE, CH_UTF16MUNGED} charset_t; #define NUM_CHARSETS 7 @@ -105,11 +105,6 @@ typedef struct smb_iconv_s { struct loadparm_context; struct smb_iconv_handle; -/* replace some string functions with multi-byte - versions */ -#define strlower(s) strlower_m(s) -#define strupper(s) strupper_m(s) - char *strchr_m(const char *s, char c); /** * Calculate the number of units (8 or 16-bit, depending on the @@ -137,8 +132,6 @@ int strcasecmp_m_handle(struct smb_iconv_handle *iconv_handle, const char *s1, const char *s2); int strcasecmp_m(const char *s1, const char *s2); size_t count_chars_m(const char *s, char c); -void strupper_m(char *s); -void strlower_m(char *s); char *strupper_talloc(TALLOC_CTX *ctx, const char *src); char *talloc_strdup_upper(TALLOC_CTX *ctx, const char *src); char *strupper_talloc_n_handle(struct smb_iconv_handle *iconv_handle, @@ -155,6 +148,7 @@ bool strhasupper_handle(struct smb_iconv_handle *ic, const char *string); char *strrchr_m(const char *s, char c); char *strchr_m(const char *s, char c); +char *strstr_m(const char *src, const char *findstr); bool push_ascii_talloc(TALLOC_CTX *ctx, char **dest, const char *src, size_t *converted_size); bool push_ucs2_talloc(TALLOC_CTX *ctx, smb_ucs2_t **dest, const char *src, size_t *converted_size); @@ -188,8 +182,7 @@ extern struct smb_iconv_handle *global_iconv_handle; struct smb_iconv_handle *get_iconv_handle(void); struct smb_iconv_handle *get_iconv_testing_handle(TALLOC_CTX *mem_ctx, const char *dos_charset, - const char *unix_charset, - const char *display_charset); + const char *unix_charset); smb_iconv_t get_conv_handle(struct smb_iconv_handle *ic, charset_t from, charset_t to); const char *charset_name(struct smb_iconv_handle *ic, charset_t ch); @@ -218,7 +211,6 @@ int codepoint_cmpi(codepoint_t c1, codepoint_t c2); struct smb_iconv_handle *smb_iconv_handle_reinit(TALLOC_CTX *mem_ctx, const char *dos_charset, const char *unix_charset, - const char *display_charset, bool native_iconv, struct smb_iconv_handle *old_ic); @@ -285,7 +277,7 @@ static size_t CHARSETNAME ## _push(void *cd, const char **inbuf, size_t *inbytes int i; \ int done = 0; \ \ - uint16 ch = SVAL(*inbuf,0); \ + uint16_t ch = SVAL(*inbuf,0); \ \ for (i=0; from_idx[i].start != 0xffff; i++) { \ if ((from_idx[i].start <= ch) && (from_idx[i].end >= ch)) { \ diff --git a/lib/util/charset/charset_macosxfs.c b/lib/util/charset/charset_macosxfs.c new file mode 100644 index 0000000000..4d2ba5b6ff --- /dev/null +++ b/lib/util/charset/charset_macosxfs.c @@ -0,0 +1,605 @@ +/* + Unix SMB/CIFS implementation. + Samba charset module for Mac OS X/Darwin + Copyright (C) Benjamin Riefenstahl 2003 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/* + * modules/charset_macosxfs.c + * + * A Samba charset module to use on Mac OS X/Darwin as the filesystem + * and display encoding. + * + * Actually two implementations are provided here. The default + * implementation is based on the official CFString API. The other is + * based on internal CFString APIs as defined in the OpenDarwin + * source. + */ + +#include "includes.h" +#undef realloc + +/* + * Include OS frameworks. These are only needed in this module. + */ +#include <CoreFoundation/CFString.h> + +/* + * See if autoconf has found us the internal headers in some form. + */ +#if HAVE_COREFOUNDATION_CFSTRINGENCODINGCONVERTER_H +# include <CoreFoundation/CFStringEncodingConverter.h> +# include <CoreFoundation/CFUnicodePrecomposition.h> +# define USE_INTERNAL_API 1 +#elif HAVE_CFSTRINGENCODINGCONVERTER_H +# include <CFStringEncodingConverter.h> +# include <CFUnicodePrecomposition.h> +# define USE_INTERNAL_API 1 +#endif + +/* + * Compile time configuration: Do we want debug output? + */ +/* #define DEBUG_STRINGS 1 */ + +/* + * A simple, but efficient memory provider for our buffers. + */ +static inline void *resize_buffer (void *buffer, size_t *size, size_t newsize) +{ + if (newsize > *size) { + *size = newsize + 128; + buffer = realloc(buffer, *size); + } + return buffer; +} + +/* + * While there is a version of OpenDarwin for intel, the usual case is + * big-endian PPC. So we need byte swapping to handle the + * little-endian byte order of the network protocol. We also need an + * additional dynamic buffer to do this work for incoming data blocks, + * because we have to consider the original data as constant. + * + * We abstract the differences away by providing a simple facade with + * these functions/macros: + * + * le_to_native(dst,src,len) + * native_to_le(cp,len) + * set_ucbuffer_with_le(buffer,bufsize,data,size) + * set_ucbuffer_with_le_copy(buffer,bufsize,data,size,reserve) + */ +#ifdef WORDS_BIGENDIAN + +static inline void swap_bytes (char * dst, const char * src, size_t len) +{ + const char *srcend = src + len; + while (src < srcend) { + dst[0] = src[1]; + dst[1] = src[0]; + dst += 2; + src += 2; + } +} +static inline void swap_bytes_inplace (char * cp, size_t len) +{ + char temp; + char *end = cp + len; + while (cp < end) { + temp = cp[1]; + cp[1] = cp[0]; + cp[0] = temp; + cp += 2; + } +} + +#define le_to_native(dst,src,len) swap_bytes(dst,src,len) +#define native_to_le(cp,len) swap_bytes_inplace(cp,len) +#define set_ucbuffer_with_le(buffer,bufsize,data,size) \ + set_ucbuffer_with_le_copy(buffer,bufsize,data,size,0) + +#else /* ! WORDS_BIGENDIAN */ + +#define le_to_native(dst,src,len) memcpy(dst,src,len) +#define native_to_le(cp,len) /* nothing */ +#define set_ucbuffer_with_le(buffer,bufsize,data,size) \ + (((void)(bufsize)),(UniChar*)(data)) + +#endif + +static inline UniChar *set_ucbuffer_with_le_copy ( + UniChar *buffer, size_t *bufsize, + const void *data, size_t size, size_t reserve) +{ + buffer = resize_buffer(buffer, bufsize, size+reserve); + le_to_native((char*)buffer,data,size); + return buffer; +} + + +/* + * A simple hexdump function for debugging error conditions. + */ +#define debug_out(s) DEBUG(0,(s)) + +#ifdef DEBUG_STRINGS + +static void hexdump( const char * label, const char * s, size_t len ) +{ + size_t restlen = len; + debug_out("<<<<<<<\n"); + debug_out(label); + debug_out("\n"); + while (restlen > 0) { + char line[100]; + size_t i, j; + char * d = line; +#undef sprintf + d += sprintf(d, "%04X ", (unsigned)(len-restlen)); + *d++ = ' '; + for( i = 0; i<restlen && i<8; ++i ) { + d += sprintf(d, "%02X ", ((unsigned)s[i]) & 0xFF); + } + for( j = i; j<8; ++j ) { + d += sprintf(d, " "); + } + *d++ = ' '; + for( i = 8; i<restlen && i<16; ++i ) { + d += sprintf(d, "%02X ", ((unsigned)s[i]) & 0xFF); + } + for( j = i; j<16; ++j ) { + d += sprintf(d, " "); + } + *d++ = ' '; + for( i = 0; i<restlen && i<16; ++i ) { + if(s[i] < ' ' || s[i] >= 0x7F || !isprint(s[i])) + *d++ = '.'; + else + *d++ = s[i]; + } + *d++ = '\n'; + *d = 0; + restlen -= i; + s += i; + debug_out(line); + } + debug_out(">>>>>>>\n"); +} + +#else /* !DEBUG_STRINGS */ + +#define hexdump(label,s,len) /* nothing */ + +#endif + + +#if !USE_INTERNAL_API + +/* + * An implementation based on documented Mac OS X APIs. + * + * This does a certain amount of memory management, creating and + * manipulating CFString objects. We try to minimize the impact by + * keeping those objects around and re-using them. We also use + * external backing store for the CFStrings where this is possible and + * benficial. + * + * The Unicode normalizations forms available at this level are + * generic, not specifically for the file system. So they may not be + * perfect fits. + */ +static size_t macosxfs_encoding_pull( + void *cd, /* Encoder handle */ + char **inbuf, size_t *inbytesleft, /* Script string */ + char **outbuf, size_t *outbytesleft) /* UTF-16-LE string */ +{ + static const int script_code = kCFStringEncodingUTF8; + static CFMutableStringRef cfstring = NULL; + size_t outsize; + CFRange range; + + (void) cd; /* UNUSED */ + + if (0 == *inbytesleft) { + return 0; + } + + if (NULL == cfstring) { + /* + * A version with an external backing store as in the + * push function should have been more efficient, but + * testing shows, that it is actually slower (!). + * Maybe kCFAllocatorDefault gets shortcut evaluation + * internally, while kCFAllocatorNull doesn't. + */ + cfstring = CFStringCreateMutable(kCFAllocatorDefault,0); + } + + /* + * Three methods of appending to a CFString, choose the most + * efficient. + */ + if (0 == (*inbuf)[*inbytesleft-1]) { + CFStringAppendCString(cfstring, *inbuf, script_code); + } else if (*inbytesleft <= 255) { + Str255 buffer; + buffer[0] = *inbytesleft; + memcpy(buffer+1, *inbuf, buffer[0]); + CFStringAppendPascalString(cfstring, buffer, script_code); + } else { + /* + * We would like to use a fixed buffer and a loop + * here, but than we can't garantee that the input is + * well-formed UTF-8, as we are supposed to do. + */ + static char *buffer = NULL; + static size_t buflen = 0; + buffer = resize_buffer(buffer, &buflen, *inbytesleft+1); + memcpy(buffer, *inbuf, *inbytesleft); + buffer[*inbytesleft] = 0; + CFStringAppendCString(cfstring, *inbuf, script_code); + } + + /* + * Compose characters, using the non-canonical composition + * form. + */ + CFStringNormalize(cfstring, kCFStringNormalizationFormC); + + outsize = CFStringGetLength(cfstring); + range = CFRangeMake(0,outsize); + + if (outsize == 0) { + /* + * HACK: smbd/mangle_hash2.c:is_legal_name() expects + * errors here. That function will always pass 2 + * characters. smbd/open.c:check_for_pipe() cuts a + * patchname to 10 characters blindly. Suppress the + * debug output in those cases. + */ + if(2 != *inbytesleft && 10 != *inbytesleft) { + debug_out("String conversion: " + "An unknown error occurred\n"); + hexdump("UTF8->UTF16LE (old) input", + *inbuf, *inbytesleft); + } + errno = EILSEQ; /* Not sure, but this is what we have + * actually seen. */ + return -1; + } + if (outsize*2 > *outbytesleft) { + CFStringDelete(cfstring, range); + debug_out("String conversion: " + "Output buffer too small\n"); + hexdump("UTF8->UTF16LE (old) input", + *inbuf, *inbytesleft); + errno = E2BIG; + return -1; + } + + CFStringGetCharacters(cfstring, range, (UniChar*)*outbuf); + CFStringDelete(cfstring, range); + + native_to_le(*outbuf, outsize*2); + + /* + * Add a converted null byte, if the CFString conversions + * prevented that until now. + */ + if (0 == (*inbuf)[*inbytesleft-1] && + (0 != (*outbuf)[outsize*2-1] || 0 != (*outbuf)[outsize*2-2])) { + + if ((outsize*2+2) > *outbytesleft) { + debug_out("String conversion: " + "Output buffer too small\n"); + hexdump("UTF8->UTF16LE (old) input", + *inbuf, *inbytesleft); + errno = E2BIG; + return -1; + } + + (*outbuf)[outsize*2] = (*outbuf)[outsize*2+1] = 0; + outsize += 2; + } + + *inbuf += *inbytesleft; + *inbytesleft = 0; + *outbuf += outsize*2; + *outbytesleft -= outsize*2; + + return 0; +} + +static size_t macosxfs_encoding_push( + void *cd, /* Encoder handle */ + char **inbuf, size_t *inbytesleft, /* UTF-16-LE string */ + char **outbuf, size_t *outbytesleft) /* Script string */ +{ + static const int script_code = kCFStringEncodingUTF8; + static CFMutableStringRef cfstring = NULL; + static UniChar *buffer = NULL; + static size_t buflen = 0; + CFIndex outsize, cfsize, charsconverted; + + (void) cd; /* UNUSED */ + + if (0 == *inbytesleft) { + return 0; + } + + /* + * We need a buffer that can hold 4 times the original data, + * because that is the theoretical maximum that decomposition + * can create currently (in Unicode 4.0). + */ + buffer = set_ucbuffer_with_le_copy( + buffer, &buflen, *inbuf, *inbytesleft, 3 * *inbytesleft); + + if (NULL == cfstring) { + cfstring = CFStringCreateMutableWithExternalCharactersNoCopy( + kCFAllocatorDefault, + buffer, *inbytesleft/2, buflen/2, + kCFAllocatorNull); + } else { + CFStringSetExternalCharactersNoCopy( + cfstring, + buffer, *inbytesleft/2, buflen/2); + } + + /* + * Decompose characters, using the non-canonical decomposition + * form. + * + * NB: This isn't exactly what HFS+ wants (see note on + * kCFStringEncodingUseHFSPlusCanonical in + * CFStringEncodingConverter.h), but AFAIK it's the best that + * the official API can do. + */ + CFStringNormalize(cfstring, kCFStringNormalizationFormD); + + cfsize = CFStringGetLength(cfstring); + charsconverted = CFStringGetBytes( + cfstring, CFRangeMake(0,cfsize), + script_code, 0, false, + *outbuf, *outbytesleft, &outsize); + + if (0 == charsconverted) { + debug_out("String conversion: " + "Buffer too small or not convertable\n"); + hexdump("UTF16LE->UTF8 (old) input", + *inbuf, *inbytesleft); + errno = EILSEQ; /* Probably more likely. */ + return -1; + } + + /* + * Add a converted null byte, if the CFString conversions + * prevented that until now. + */ + if (0 == (*inbuf)[*inbytesleft-1] && 0 == (*inbuf)[*inbytesleft-2] && + (0 != (*outbuf)[outsize-1])) { + + if (((size_t)outsize+1) > *outbytesleft) { + debug_out("String conversion: " + "Output buffer too small\n"); + hexdump("UTF16LE->UTF8 (old) input", + *inbuf, *inbytesleft); + errno = E2BIG; + return -1; + } + + (*outbuf)[outsize] = 0; + ++outsize; + } + + *inbuf += *inbytesleft; + *inbytesleft = 0; + *outbuf += outsize; + *outbytesleft -= outsize; + + return 0; +} + +#else /* USE_INTERNAL_API */ + +/* + * An implementation based on internal code as known from the + * OpenDarwin CVS. + * + * This code doesn't need much memory management because it uses + * functions that operate on the raw memory directly. + * + * The push routine here is faster and more compatible with HFS+ than + * the other implementation above. The pull routine is only faster + * for some strings, slightly slower for others. The pull routine + * looses because it has to iterate over the data twice, once to + * decode UTF-8 and than to do the character composition required by + * Windows. + */ +static size_t macosxfs_encoding_pull( + void *cd, /* Encoder handle */ + char **inbuf, size_t *inbytesleft, /* Script string */ + char **outbuf, size_t *outbytesleft) /* UTF-16-LE string */ +{ + static const int script_code = kCFStringEncodingUTF8; + UInt32 srcCharsUsed = 0; + UInt32 dstCharsUsed = 0; + UInt32 result; + uint32_t dstDecomposedUsed = 0; + uint32_t dstPrecomposedUsed = 0; + + (void) cd; /* UNUSED */ + + if (0 == *inbytesleft) { + return 0; + } + + result = CFStringEncodingBytesToUnicode( + script_code, kCFStringEncodingComposeCombinings, + *inbuf, *inbytesleft, &srcCharsUsed, + (UniChar*)*outbuf, *outbytesleft, &dstCharsUsed); + + switch(result) { + case kCFStringEncodingConversionSuccess: + if (*inbytesleft == srcCharsUsed) + break; + else + ; /*fall through*/ + case kCFStringEncodingInsufficientOutputBufferLength: + debug_out("String conversion: " + "Output buffer too small\n"); + hexdump("UTF8->UTF16LE (new) input", + *inbuf, *inbytesleft); + errno = E2BIG; + return -1; + case kCFStringEncodingInvalidInputStream: + /* + * HACK: smbd/mangle_hash2.c:is_legal_name() expects + * errors here. That function will always pass 2 + * characters. smbd/open.c:check_for_pipe() cuts a + * patchname to 10 characters blindly. Suppress the + * debug output in those cases. + */ + if(2 != *inbytesleft && 10 != *inbytesleft) { + debug_out("String conversion: " + "Invalid input sequence\n"); + hexdump("UTF8->UTF16LE (new) input", + *inbuf, *inbytesleft); + } + errno = EILSEQ; + return -1; + case kCFStringEncodingConverterUnavailable: + debug_out("String conversion: " + "Unknown encoding\n"); + hexdump("UTF8->UTF16LE (new) input", + *inbuf, *inbytesleft); + errno = EINVAL; + return -1; + } + + /* + * It doesn't look like CFStringEncodingBytesToUnicode() can + * produce precomposed characters (flags=ComposeCombinings + * doesn't do it), so we need another pass over the data here. + * We can do this in-place, as the string can only get + * shorter. + * + * (Actually in theory there should be an internal + * decomposition and reordering before the actual composition + * step. But we should be able to rely on that we always get + * fully decomposed strings for input, so this can't create + * problems in reality.) + */ + CFUniCharPrecompose( + (const UTF16Char *)*outbuf, dstCharsUsed, &dstDecomposedUsed, + (UTF16Char *)*outbuf, dstCharsUsed, &dstPrecomposedUsed); + + native_to_le(*outbuf, dstPrecomposedUsed*2); + + *inbuf += srcCharsUsed; + *inbytesleft -= srcCharsUsed; + *outbuf += dstPrecomposedUsed*2; + *outbytesleft -= dstPrecomposedUsed*2; + + return 0; +} + +static size_t macosxfs_encoding_push( + void *cd, /* Encoder handle */ + char **inbuf, size_t *inbytesleft, /* UTF-16-LE string */ + char **outbuf, size_t *outbytesleft) /* Script string */ +{ + static const int script_code = kCFStringEncodingUTF8; + static UniChar *buffer = NULL; + static size_t buflen = 0; + UInt32 srcCharsUsed=0, dstCharsUsed=0, result; + + (void) cd; /* UNUSED */ + + if (0 == *inbytesleft) { + return 0; + } + + buffer = set_ucbuffer_with_le( + buffer, &buflen, *inbuf, *inbytesleft); + + result = CFStringEncodingUnicodeToBytes( + script_code, kCFStringEncodingUseHFSPlusCanonical, + buffer, *inbytesleft/2, &srcCharsUsed, + *outbuf, *outbytesleft, &dstCharsUsed); + + switch(result) { + case kCFStringEncodingConversionSuccess: + if (*inbytesleft/2 == srcCharsUsed) + break; + else + ; /*fall through*/ + case kCFStringEncodingInsufficientOutputBufferLength: + debug_out("String conversion: " + "Output buffer too small\n"); + hexdump("UTF16LE->UTF8 (new) input", + *inbuf, *inbytesleft); + errno = E2BIG; + return -1; + case kCFStringEncodingInvalidInputStream: + /* + * HACK: smbd/open.c:check_for_pipe():is_legal_name() + * cuts a pathname to 10 characters blindly. Suppress + * the debug output in those cases. + */ + if(10 != *inbytesleft) { + debug_out("String conversion: " + "Invalid input sequence\n"); + hexdump("UTF16LE->UTF8 (new) input", + *inbuf, *inbytesleft); + } + errno = EILSEQ; + return -1; + case kCFStringEncodingConverterUnavailable: + debug_out("String conversion: " + "Unknown encoding\n"); + hexdump("UTF16LE->UTF8 (new) input", + *inbuf, *inbytesleft); + errno = EINVAL; + return -1; + } + + *inbuf += srcCharsUsed*2; + *inbytesleft -= srcCharsUsed*2; + *outbuf += dstCharsUsed; + *outbytesleft -= dstCharsUsed; + + return 0; +} + +#endif /* USE_INTERNAL_API */ + +/* + * For initialization, actually install the encoding as "macosxfs". + */ +static struct charset_functions macosxfs_encoding_functions = { + "MACOSXFS", macosxfs_encoding_pull, macosxfs_encoding_push +}; + +NTSTATUS charset_macosxfs_init(void) +{ + if (!smb_register_charset(&macosxfs_encoding_functions)) { + return NT_STATUS_INTERNAL_ERROR; + } + return NT_STATUS_OK; +} + +/* eof */ diff --git a/lib/util/charset/codepoints.c b/lib/util/charset/codepoints.c index cd54420e8e..8cc33a9782 100644 --- a/lib/util/charset/codepoints.c +++ b/lib/util/charset/codepoints.c @@ -23,7 +23,7 @@ #include "includes.h" #include "lib/util/charset/charset.h" #include "system/locale.h" -#include "dynconfig.h" +#include "dynconfig/dynconfig.h" #ifdef strcasecmp #undef strcasecmp @@ -168,17 +168,16 @@ struct smb_iconv_handle *get_iconv_handle(void) { if (global_iconv_handle == NULL) global_iconv_handle = smb_iconv_handle_reinit(talloc_autofree_context(), - "ASCII", "UTF-8", "ASCII", true, NULL); + "ASCII", "UTF-8", true, NULL); return global_iconv_handle; } struct smb_iconv_handle *get_iconv_testing_handle(TALLOC_CTX *mem_ctx, const char *dos_charset, - const char *unix_charset, - const char *display_charset) + const char *unix_charset) { return smb_iconv_handle_reinit(mem_ctx, - dos_charset, unix_charset, display_charset, true, NULL); + dos_charset, unix_charset, true, NULL); } /** @@ -190,7 +189,6 @@ const char *charset_name(struct smb_iconv_handle *ic, charset_t ch) case CH_UTF16: return "UTF-16LE"; case CH_UNIX: return ic->unix_charset; case CH_DOS: return ic->dos_charset; - case CH_DISPLAY: return ic->display_charset; case CH_UTF8: return "UTF8"; case CH_UTF16BE: return "UTF-16BE"; case CH_UTF16MUNGED: return "UTF16_MUNGED"; @@ -219,37 +217,6 @@ static int close_iconv_handle(struct smb_iconv_handle *data) return 0; } -static const char *map_locale(const char *charset) -{ - if (strcmp(charset, "LOCALE") != 0) { - return charset; - } -#if defined(HAVE_NL_LANGINFO) && defined(CODESET) - { - const char *ln; - smb_iconv_t handle; - - ln = nl_langinfo(CODESET); - if (ln == NULL) { - DEBUG(1,("Unable to determine charset for LOCALE - using ASCII\n")); - return "ASCII"; - } - /* Check whether the charset name is supported - by iconv */ - handle = smb_iconv_open(ln, "UCS-2LE"); - if (handle == (smb_iconv_t) -1) { - DEBUG(5,("Locale charset '%s' unsupported, using ASCII instead\n", ln)); - return "ASCII"; - } else { - DEBUG(5,("Substituting charset '%s' for LOCALE\n", ln)); - smb_iconv_close(handle); - } - return ln; - } -#endif - return "ASCII"; -} - /* the old_ic is passed in here as the smb_iconv_handle structure is used as a global pointer in some places (eg. python modules). We @@ -261,14 +228,11 @@ static const char *map_locale(const char *charset) _PUBLIC_ struct smb_iconv_handle *smb_iconv_handle_reinit(TALLOC_CTX *mem_ctx, const char *dos_charset, const char *unix_charset, - const char *display_charset, bool native_iconv, struct smb_iconv_handle *old_ic) { struct smb_iconv_handle *ret; - display_charset = map_locale(display_charset); - if (old_ic != NULL) { ret = old_ic; close_iconv_handle(ret); @@ -290,9 +254,13 @@ _PUBLIC_ struct smb_iconv_handle *smb_iconv_handle_reinit(TALLOC_CTX *mem_ctx, talloc_set_destructor(ret, close_iconv_handle); + if (strcasecmp(dos_charset, "UTF8") == 0 || strcasecmp(dos_charset, "UTF-8") == 0) { + DEBUG(0,("ERROR: invalid DOS charset: 'dos charset' must not be UTF8, using (default value) CP850 instead\n")); + dos_charset = "CP850"; + } + ret->dos_charset = talloc_strdup(ret->child_ctx, dos_charset); ret->unix_charset = talloc_strdup(ret->child_ctx, unix_charset); - ret->display_charset = talloc_strdup(ret->child_ctx, display_charset); ret->native_iconv = native_iconv; return ret; diff --git a/lib/util/charset/convert_string.c b/lib/util/charset/convert_string.c index e51add2aaf..51f9fec137 100644 --- a/lib/util/charset/convert_string.c +++ b/lib/util/charset/convert_string.c @@ -2,7 +2,8 @@ Unix SMB/CIFS implementation. Character set conversion Extensions Copyright (C) Igor Vergeichik <iverg@mail.ru> 2001 - Copyright (C) Andrew Tridgell 2001 + Copyright (C) Andrew Tridgell 2001-2011 + Copyright (C) Andrew Bartlett 2011 Copyright (C) Simo Sorce 2001 Copyright (C) Martin Pool 2003 @@ -21,6 +22,7 @@ */ #include "includes.h" +#include "system/iconv.h" /** * @file @@ -177,28 +179,29 @@ bool convert_string_error_handle(struct smb_iconv_handle *ic, size_t slen = srclen; size_t dlen = destlen; unsigned char lastp = '\0'; + bool ret; - /* If all characters are ascii, fast path here. */ - while (((slen == (size_t)-1) || (slen >= 2)) && dlen) { - if (((lastp = *p) <= 0x7f) && (p[1] == 0)) { + if (slen == (size_t)-1) { + while (dlen && + ((lastp = *p) <= 0x7f) && (p[1] == 0)) { *q++ = *p; - if (slen != (size_t)-1) { - slen -= 2; - } p += 2; dlen--; retval++; if (!lastp) break; - } else { -#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS - goto general_case; -#else - bool ret = convert_string_internal(ic, from, to, p, slen, q, dlen, converted_size); - *converted_size += retval; - return ret; -#endif } + if (lastp != 0) goto slow_path; + } else { + while (slen >= 2 && dlen && + (*p <= 0x7f) && (p[1] == 0)) { + *q++ = *p; + slen -= 2; + p += 2; + dlen--; + retval++; + } + if (slen != 0) goto slow_path; } *converted_size = retval; @@ -212,6 +215,19 @@ bool convert_string_error_handle(struct smb_iconv_handle *ic, } } return true; + + slow_path: + /* come here when we hit a character we can't deal + * with in the fast path + */ +#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS + goto general_case; +#else + ret = convert_string_internal(ic, from, to, p, slen, q, dlen, converted_size); + *converted_size += retval; + return ret; +#endif + } else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) { const unsigned char *p = (const unsigned char *)src; unsigned char *q = (unsigned char *)dest; @@ -221,8 +237,8 @@ bool convert_string_error_handle(struct smb_iconv_handle *ic, unsigned char lastp = '\0'; /* If all characters are ascii, fast path here. */ - while (slen && (dlen >= 2)) { - if ((lastp = *p) <= 0x7F) { + while (slen && (dlen >= 1)) { + if (dlen >=2 && (lastp = *p) <= 0x7F) { *q++ = *p++; *q++ = '\0'; if (slen != (size_t)-1) { @@ -387,7 +403,7 @@ bool convert_string_talloc_handle(TALLOC_CTX *ctx, struct smb_iconv_handle *ic, } /* +2 is for ucs2 null termination. */ - ob = (char *)TALLOC_REALLOC(ctx, ob, destlen + 2); + ob = talloc_realloc(ctx, ob, char, destlen + 2); if (!ob) { DEBUG(0, ("convert_string_talloc: realloc failed!\n")); @@ -428,7 +444,7 @@ bool convert_string_talloc_handle(TALLOC_CTX *ctx, struct smb_iconv_handle *ic, */ if (o_len > 1024) { /* We're shrinking here so we know the +2 is safe from wrap. */ - ob = (char *)TALLOC_REALLOC(ctx,ob,destlen + 2); + ob = talloc_realloc(ctx,ob, char, destlen + 2); } if (destlen && !ob) { diff --git a/lib/util/charset/pull_push.c b/lib/util/charset/pull_push.c new file mode 100644 index 0000000000..b7a5bcdc65 --- /dev/null +++ b/lib/util/charset/pull_push.c @@ -0,0 +1,150 @@ +/* + Unix SMB/CIFS implementation. + Character set conversion Extensions + Copyright (C) Igor Vergeichik <iverg@mail.ru> 2001 + Copyright (C) Andrew Tridgell 2001 + Copyright (C) Simo Sorce 2001 + Copyright (C) Martin Pool 2003 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + +*/ + +#include "includes.h" +#include "system/locale.h" + +/** + * Copy a string from a unix char* src to a UCS2 destination, + * allocating a buffer using talloc(). + * + * @param dest always set at least to NULL + * @parm converted_size set to the number of bytes occupied by the string in + * the destination on success. + * + * @return true if new buffer was correctly allocated, and string was + * converted. + **/ +bool push_ucs2_talloc(TALLOC_CTX *ctx, smb_ucs2_t **dest, const char *src, + size_t *converted_size) +{ + size_t src_len = strlen(src)+1; + + *dest = NULL; + return convert_string_talloc(ctx, CH_UNIX, CH_UTF16LE, src, src_len, + (void **)dest, converted_size); +} + +/** + * Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer using talloc + * + * @param dest always set at least to NULL + * @parm converted_size set to the number of bytes occupied by the string in + * the destination on success. + * + * @return true if new buffer was correctly allocated, and string was + * converted. + **/ + +bool push_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src, + size_t *converted_size) +{ + size_t src_len = strlen(src)+1; + + *dest = NULL; + return convert_string_talloc(ctx, CH_UNIX, CH_UTF8, src, src_len, + (void**)dest, converted_size); +} + +/** + * Copy a string from a unix char* src to an ASCII destination, + * allocating a buffer using talloc(). + * + * @param dest always set at least to NULL + * + * @param converted_size The number of bytes occupied by the string in the destination + * @returns boolean indicating if the conversion was successful + **/ +bool push_ascii_talloc(TALLOC_CTX *mem_ctx, char **dest, const char *src, size_t *converted_size) +{ + size_t src_len = strlen(src)+1; + + *dest = NULL; + return convert_string_talloc(mem_ctx, CH_UNIX, CH_DOS, src, src_len, + (void **)dest, converted_size); +} + +/** + * Copy a string from a UCS2 src to a unix char * destination, allocating a buffer using talloc + * + * @param dest always set at least to NULL + * @parm converted_size set to the number of bytes occupied by the string in + * the destination on success. + * + * @return true if new buffer was correctly allocated, and string was + * converted. + **/ + +bool pull_ucs2_talloc(TALLOC_CTX *ctx, char **dest, const smb_ucs2_t *src, + size_t *converted_size) +{ + size_t src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t); + + *dest = NULL; + return convert_string_talloc(ctx, CH_UTF16LE, CH_UNIX, src, src_len, + (void **)dest, converted_size); +} + + +/** + * Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer using talloc + * + * @param dest always set at least to NULL + * @parm converted_size set to the number of bytes occupied by the string in + * the destination on success. + * + * @return true if new buffer was correctly allocated, and string was + * converted. + **/ + +bool pull_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src, + size_t *converted_size) +{ + size_t src_len = strlen(src)+1; + + *dest = NULL; + return convert_string_talloc(ctx, CH_UTF8, CH_UNIX, src, src_len, + (void **)dest, converted_size); +} + + +/** + * Copy a string from a DOS src to a unix char * destination, allocating a buffer using talloc + * + * @param dest always set at least to NULL + * @parm converted_size set to the number of bytes occupied by the string in + * the destination on success. + * + * @return true if new buffer was correctly allocated, and string was + * converted. + **/ + +bool pull_ascii_talloc(TALLOC_CTX *ctx, char **dest, const char *src, + size_t *converted_size) +{ + size_t src_len = strlen(src)+1; + + *dest = NULL; + return convert_string_talloc(ctx, CH_DOS, CH_UNIX, src, src_len, + (void **)dest, converted_size); +} diff --git a/lib/util/charset/tests/convert_string.c b/lib/util/charset/tests/convert_string.c index 32fc11f527..9a5d974fe3 100644 --- a/lib/util/charset/tests/convert_string.c +++ b/lib/util/charset/tests/convert_string.c @@ -105,7 +105,7 @@ static bool test_gd_iso8859_cp850_handle(struct torture_context *tctx) talloc_steal(tctx, gd_iso8859_1.data); talloc_steal(tctx, gd_utf16le.data); - iconv_handle = get_iconv_testing_handle(tctx, "ISO8859-1", "CP850", "UTF8"); + iconv_handle = get_iconv_testing_handle(tctx, "ISO8859-1", "CP850"); torture_assert(tctx, iconv_handle, "getting iconv handle"); torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle, @@ -199,11 +199,11 @@ static bool test_gd_iso8859_cp850_handle(struct torture_context *tctx) torture_assert_data_blob_equal(tctx, gd_output, gd_cp850, "conversion from UTF8 to (unix charset) CP850 incorrect"); torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle, - CH_UTF8, CH_DISPLAY, + CH_UTF8, CH_UTF8, gd_utf8.data, gd_utf8.length, (void *)&gd_output.data, &gd_output.length), - "conversion from UTF8 to (display charset) UTF8"); - torture_assert_data_blob_equal(tctx, gd_output, gd_utf8, "conversion from UTF8 to (display charset) UTF8 incorrect"); + "conversion from UTF8 to UTF8"); + torture_assert_data_blob_equal(tctx, gd_output, gd_utf8, "conversion from UTF8 to UTF8 incorrect"); torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle, CH_UTF16LE, CH_DOS, @@ -227,11 +227,11 @@ static bool test_gd_iso8859_cp850_handle(struct torture_context *tctx) torture_assert_data_blob_equal(tctx, gd_output, gd_cp850, "conversion from UTF16LE to (unix charset) CP850 incorrect"); torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle, - CH_UTF16LE, CH_DISPLAY, + CH_UTF16LE, CH_UTF8, gd_utf16le.data, gd_utf16le.length, (void *)&gd_output.data, &gd_output.length), - "conversion from UTF16LE to (display charset) UTF8"); - torture_assert_data_blob_equal(tctx, gd_output, gd_utf8, "conversion from UTF16LE to (display charset) UTF8 incorrect"); + "conversion from UTF16LE to UTF8"); + torture_assert_data_blob_equal(tctx, gd_output, gd_utf8, "conversion from UTF16LE to UTF8 incorrect"); torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle, CH_DOS, CH_DOS, @@ -248,11 +248,11 @@ static bool test_gd_iso8859_cp850_handle(struct torture_context *tctx) torture_assert_data_blob_equal(tctx, gd_output, gd_cp850, "conversion from UTF16LE to (unix charset) CP850 incorrect"); torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle, - CH_DOS, CH_DISPLAY, + CH_DOS, CH_UTF8, gd_iso8859_1.data, gd_iso8859_1.length, (void *)&gd_output.data, &gd_output.length), - "conversion from (dos charset) ISO8859-1 to (display charset) UTF8"); - torture_assert_data_blob_equal(tctx, gd_output, gd_utf8, "conversion from UTF16LE to (display charset) UTF8 incorrect"); + "conversion from (dos charset) ISO8859-1 to UTF8"); + torture_assert_data_blob_equal(tctx, gd_output, gd_utf8, "conversion from UTF16LE to UTF8 incorrect"); torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle, CH_DOS, CH_UTF16LE, @@ -265,7 +265,7 @@ static bool test_gd_iso8859_cp850_handle(struct torture_context *tctx) (const char *)gd_iso8859_1.data, CH_DOS, CH_UTF16LE), gd_output.length / 2, - "checking strlen_m_ext of round trip conversion of UTF16 latin charset greek to display charset UTF8 and back again"); + "checking strlen_m_ext of round trip conversion of UTF16 latin charset greek to UTF8 and back again"); torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle, CH_DOS, CH_UTF8, @@ -282,6 +282,191 @@ static bool test_gd_iso8859_cp850_handle(struct torture_context *tctx) return true; } +static bool test_gd_minus_1_handle(struct torture_context *tctx) +{ + struct smb_iconv_handle *iconv_handle; + DATA_BLOB gd_utf8 = base64_decode_data_blob(gd_utf8_base64); + DATA_BLOB gd_cp850 = base64_decode_data_blob(gd_cp850_base64); + DATA_BLOB gd_utf16le = base64_decode_data_blob(gd_utf16le_base64); + DATA_BLOB gd_output; + DATA_BLOB gd_utf8_terminated; + DATA_BLOB gd_cp850_terminated; + DATA_BLOB gd_utf16le_terminated; + + talloc_steal(tctx, gd_utf8.data); + talloc_steal(tctx, gd_cp850.data); + talloc_steal(tctx, gd_utf16le.data); + + iconv_handle = get_iconv_testing_handle(tctx, "CP850", "CP850"); + torture_assert(tctx, iconv_handle, "getting iconv handle"); + + gd_utf8_terminated = data_blob_talloc(tctx, NULL, gd_utf8.length + 1); + memcpy(gd_utf8_terminated.data, gd_utf8.data, gd_utf8.length); + gd_utf8_terminated.data[gd_utf8.length] = '\0'; + + gd_cp850_terminated = data_blob_talloc(tctx, NULL, gd_cp850.length + 1); + memcpy(gd_cp850_terminated.data, gd_cp850.data, gd_cp850.length); + gd_cp850_terminated.data[gd_cp850.length] = '\0'; + + gd_utf16le_terminated = data_blob_talloc(tctx, NULL, gd_utf16le.length + 2); + memcpy(gd_utf16le_terminated.data, gd_utf16le.data, gd_utf16le.length); + gd_utf16le_terminated.data[gd_utf16le.length] = '\0'; + gd_utf16le_terminated.data[gd_utf16le.length + 1] = '\0'; + + gd_output = data_blob_talloc(tctx, NULL, gd_utf16le.length + 10); + + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF8, CH_UTF16LE, + gd_utf8_terminated.data, -1, + (void *)gd_output.data, gd_output.length, &gd_output.length), + "conversion from UTF8 to UTF16LE null terminated"); + torture_assert_data_blob_equal(tctx, gd_output, gd_utf16le_terminated, "conversion from UTF8 to UTF16LE null terminated"); + + gd_output = data_blob_talloc(tctx, NULL, gd_utf16le.length + 10); + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF8, CH_UTF16LE, + gd_utf8_terminated.data, -1, + (void *)gd_output.data, gd_utf16le.length, &gd_output.length) == false, + "conversion from UTF8 to UTF16LE null terminated should fail"); + torture_assert_errno_equal(tctx, E2BIG, "conversion from UTF8 to UTF16LE should fail E2BIG"); + torture_assert_data_blob_equal(tctx, gd_output, gd_utf16le, "conversion from UTF8 to UTF16LE null terminated"); + + gd_output = data_blob_talloc(tctx, NULL, gd_utf16le.length + 10); + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF8, CH_UTF16LE, + gd_utf8_terminated.data, -1, + (void *)gd_output.data, gd_utf16le.length - 1, &gd_output.length) == false, + "conversion from UTF8 to UTF16LE null terminated should fail"); + torture_assert_errno_equal(tctx, E2BIG, "conversion from UTF8 to UTF16LE should fail E2BIG"); + + gd_output = data_blob_talloc(tctx, NULL, gd_utf16le.length + 10); + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF8, CH_UTF16LE, + gd_utf8_terminated.data, -1, + (void *)gd_output.data, gd_utf16le.length - 2, &gd_output.length) == false, + "conversion from UTF8 to UTF16LE null terminated should fail"); + torture_assert_errno_equal(tctx, E2BIG, "conversion from UTF8 to UTF16LE should fail E2BIG"); + + gd_output = data_blob_talloc(tctx, NULL, gd_utf8.length + 10); + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF16LE, CH_UTF8, + gd_utf16le_terminated.data, -1, + (void *)gd_output.data, gd_output.length, &gd_output.length), + "conversion from UTF16LE to UTF8 null terminated"); + torture_assert_data_blob_equal(tctx, gd_output, gd_utf8_terminated, "conversion from UTF16LE to UTF8 null terminated"); + + gd_output = data_blob_talloc(tctx, NULL, gd_utf8.length + 10); + + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF16LE, CH_UTF8, + gd_utf16le_terminated.data, -1, + (void *)gd_output.data, gd_utf8.length, &gd_output.length) == false, + "conversion from UTF16LE to UTF8 null terminated should fail"); + torture_assert_errno_equal(tctx, E2BIG, "conversion from UTF16LE to UTF8 should fail E2BIG"); + torture_assert_data_blob_equal(tctx, gd_output, gd_utf8, "conversion from UTF16LE to UTF8 null terminated"); + + gd_output = data_blob_talloc(tctx, NULL, gd_utf8.length + 10); + + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF16LE, CH_UTF8, + gd_utf16le_terminated.data, -1, + (void *)gd_output.data, gd_utf8.length - 1, &gd_output.length) == false, + "conversion from UTF16LE to UTF8 null terminated should fail"); + torture_assert_errno_equal(tctx, E2BIG, "conversion from UTF16LE to UTF8 should fail E2BIG"); + + gd_output = data_blob_talloc(tctx, NULL, gd_utf8.length + 10); + + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF16LE, CH_UTF8, + gd_utf16le_terminated.data, -1, + (void *)gd_output.data, gd_utf8.length - 2, &gd_output.length) == false, + "conversion from UTF16LE to UTF8 null terminated should fail"); + torture_assert_errno_equal(tctx, E2BIG, "conversion from UTF16LE to UTF8 should fail E2BIG"); + + gd_output = data_blob_talloc(tctx, NULL, gd_cp850.length + 10); + + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF16LE, CH_DOS, + gd_utf16le_terminated.data, -1, + (void *)gd_output.data, gd_output.length, &gd_output.length), + "conversion from UTF16LE to CP850 (dos) null terminated"); + torture_assert_data_blob_equal(tctx, gd_output, gd_cp850_terminated, "conversion from UTF16LE to CP850 (dos) null terminated"); + + /* Now null terminate the string early, the confirm we don't skip the NULL and convert any further */ + gd_utf8_terminated.data[3] = '\0'; + gd_utf8_terminated.length = 4; /* used for the comparison only */ + + gd_cp850_terminated.data[2] = '\0'; + gd_cp850_terminated.length = 3; /* used for the comparison only */ + + gd_utf16le_terminated.data[4] = '\0'; + gd_utf16le_terminated.data[5] = '\0'; + gd_utf16le_terminated.length = 6; /* used for the comparison only */ + + gd_output = data_blob_talloc(tctx, NULL, gd_utf16le.length + 10); + + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF8, CH_UTF16LE, + gd_utf8_terminated.data, -1, + (void *)gd_output.data, gd_output.length, &gd_output.length), + "conversion from UTF8 to UTF16LE null terminated"); + torture_assert_data_blob_equal(tctx, gd_output, gd_utf16le_terminated, "conversion from UTF8 to UTF16LE null terminated early"); + + gd_output = data_blob_talloc(tctx, NULL, gd_utf8.length + 10); + + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF16LE, CH_UTF8, + gd_utf16le_terminated.data, -1, + (void *)gd_output.data, gd_output.length, &gd_output.length), + "conversion from UTF16LE to UTF8 null terminated"); + torture_assert_data_blob_equal(tctx, gd_output, gd_utf8_terminated, "conversion from UTF16LE to UTF8 null terminated early"); + + gd_output = data_blob_talloc(tctx, NULL, gd_utf16le.length + 10); + + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_DOS, CH_UTF16LE, + gd_cp850_terminated.data, -1, + (void *)gd_output.data, gd_output.length, &gd_output.length), + "conversion from CP850 to UTF16LE null terminated"); + torture_assert_data_blob_equal(tctx, gd_output, gd_utf16le_terminated, "conversion from UTF8 to UTF16LE null terminated early"); + + gd_output = data_blob_talloc(tctx, NULL, gd_cp850.length + 10); + + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF16LE, CH_DOS, + gd_utf16le_terminated.data, -1, + (void *)gd_output.data, gd_output.length, &gd_output.length), + "conversion from UTF16LE to UTF8 null terminated"); + torture_assert_data_blob_equal(tctx, gd_output, gd_cp850_terminated, "conversion from UTF16LE to UTF8 null terminated early"); + + /* Now null terminate the string particularly early, the confirm we don't skip the NULL and convert any further */ + gd_utf8_terminated.data[1] = '\0'; + gd_utf8_terminated.length = 2; /* used for the comparison only */ + + gd_utf16le_terminated.data[2] = '\0'; + gd_utf16le_terminated.data[3] = '\0'; + gd_utf16le_terminated.length = 4; /* used for the comparison only */ + + gd_output = data_blob_talloc(tctx, NULL, gd_utf16le.length + 10); + + torture_assert(tctx, convert_string_error_handle(iconv_handle, CH_UTF8, CH_UTF16LE, + gd_utf8_terminated.data, -1, + (void *)gd_output.data, gd_output.length, &gd_output.length), + "conversion from UTF8 to UTF16LE null terminated"); + torture_assert_data_blob_equal(tctx, gd_output, gd_utf16le_terminated, "conversion from UTF8 to UTF16LE null terminated very early"); + + gd_output = data_blob_talloc(tctx, NULL, gd_utf8.length + 10); + + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF16LE, CH_UTF8, + gd_utf16le_terminated.data, -1, + (void *)gd_output.data, gd_output.length, &gd_output.length), + "conversion from UTF16LE to UTF8 null terminated"); + torture_assert_data_blob_equal(tctx, gd_output, gd_utf8_terminated, "conversion from UTF16LE to UTF8 null terminated very early"); + + return true; +} + static bool test_gd_ascii_handle(struct torture_context *tctx) { struct smb_iconv_handle *iconv_handle; @@ -296,7 +481,7 @@ static bool test_gd_ascii_handle(struct torture_context *tctx) talloc_steal(tctx, gd_iso8859_1.data); talloc_steal(tctx, gd_utf16le.data); - iconv_handle = get_iconv_testing_handle(tctx, "ASCII", "UTF8", "UTF8"); + iconv_handle = get_iconv_testing_handle(tctx, "ASCII", "UTF8"); torture_assert(tctx, iconv_handle, "getting iconv handle"); torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle, @@ -365,7 +550,7 @@ static bool test_plato_english_iso8859_cp850_handle(struct torture_context *tctx talloc_steal(tctx, plato_english_utf16le.data); - iconv_handle = get_iconv_testing_handle(tctx, "ISO8859-1", "CP850", "UTF8"); + iconv_handle = get_iconv_testing_handle(tctx, "ISO8859-1", "CP850"); torture_assert(tctx, iconv_handle, "getting iconv handle"); torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle, @@ -383,11 +568,11 @@ static bool test_plato_english_iso8859_cp850_handle(struct torture_context *tctx torture_assert_data_blob_equal(tctx, plato_english_output, plato_english_cp850, "conversion from UTF8 to (unix charset) CP850 incorrect"); torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle, - CH_UTF8, CH_DISPLAY, + CH_UTF8, CH_UTF8, plato_english_utf8.data, plato_english_utf8.length, (void *)&plato_english_output.data, &plato_english_output.length), - "conversion from UTF8 to (display charset) UTF8"); - torture_assert_data_blob_equal(tctx, plato_english_output, plato_english_utf8, "conversion from UTF8 to (display charset) UTF8 incorrect"); + "conversion from UTF8 to UTF8"); + torture_assert_data_blob_equal(tctx, plato_english_output, plato_english_utf8, "conversion from UTF8 to UTF8 incorrect"); torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle, CH_UTF16LE, CH_DOS, @@ -436,11 +621,11 @@ static bool test_plato_english_iso8859_cp850_handle(struct torture_context *tctx torture_assert_data_blob_equal(tctx, plato_english_output, plato_english_cp850, "conversion from UTF16LE to (unix charset) CP850 incorrect"); torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle, - CH_UTF16LE, CH_DISPLAY, + CH_UTF16LE, CH_UTF8, plato_english_utf16le.data, plato_english_utf16le.length, (void *)&plato_english_output.data, &plato_english_output.length), - "conversion from UTF16LE to (display charset) UTF8"); - torture_assert_data_blob_equal(tctx, plato_english_output, plato_english_utf8, "conversion from UTF16LE to (display charset) UTF8 incorrect"); + "conversion from UTF16LE to UTF8"); + torture_assert_data_blob_equal(tctx, plato_english_output, plato_english_utf8, "conversion from UTF16LE to UTF8 incorrect"); torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle, CH_DOS, CH_DOS, @@ -457,11 +642,11 @@ static bool test_plato_english_iso8859_cp850_handle(struct torture_context *tctx torture_assert_data_blob_equal(tctx, plato_english_output, plato_english_cp850, "conversion from UTF16LE to (unix charset) CP850 incorrect"); torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle, - CH_DOS, CH_DISPLAY, + CH_DOS, CH_UTF8, plato_english_iso8859_1.data, plato_english_iso8859_1.length, (void *)&plato_english_output.data, &plato_english_output.length), - "conversion from (dos charset) ISO8859-1 to (display charset) UTF8"); - torture_assert_data_blob_equal(tctx, plato_english_output, plato_english_utf8, "conversion from UTF16LE to (display charset) UTF8 incorrect"); + "conversion from (dos charset) ISO8859-1 to UTF8"); + torture_assert_data_blob_equal(tctx, plato_english_output, plato_english_utf8, "conversion from UTF16LE to UTF8 incorrect"); torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle, CH_DOS, CH_UTF16LE, @@ -472,6 +657,261 @@ static bool test_plato_english_iso8859_cp850_handle(struct torture_context *tctx return true; } +static bool test_plato_english_minus_1_handle(struct torture_context *tctx) +{ + struct smb_iconv_handle *iconv_handle; + DATA_BLOB plato_english_utf8 = data_blob_string_const(plato_english_ascii); + DATA_BLOB plato_english_utf16le = base64_decode_data_blob(plato_english_utf16le_base64); + DATA_BLOB plato_english_output; + DATA_BLOB plato_english_utf8_terminated; + DATA_BLOB plato_english_utf16le_terminated; + + talloc_steal(tctx, plato_english_utf16le.data); + + iconv_handle = get_iconv_testing_handle(tctx, "ISO8859-1", "CP850"); + torture_assert(tctx, iconv_handle, "getting iconv handle"); + + plato_english_utf8_terminated = data_blob_talloc(tctx, NULL, plato_english_utf8.length + 1); + memcpy(plato_english_utf8_terminated.data, plato_english_utf8.data, plato_english_utf8.length); + plato_english_utf8_terminated.data[plato_english_utf8.length] = '\0'; + + plato_english_utf16le_terminated = data_blob_talloc(tctx, NULL, plato_english_utf16le.length + 2); + memcpy(plato_english_utf16le_terminated.data, plato_english_utf16le.data, plato_english_utf16le.length); + plato_english_utf16le_terminated.data[plato_english_utf16le.length] = '\0'; + plato_english_utf16le_terminated.data[plato_english_utf16le.length + 1] = '\0'; + + plato_english_output = data_blob_talloc(tctx, NULL, plato_english_utf16le.length + 10); + + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF8, CH_UTF16LE, + plato_english_utf8_terminated.data, -1, + (void *)plato_english_output.data, plato_english_output.length, &plato_english_output.length), + "conversion from UTF8 to UTF16LE null terminated"); + torture_assert_data_blob_equal(tctx, plato_english_output, plato_english_utf16le_terminated, "conversion from UTF8 to UTF16LE null terminated"); + + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF8, CH_UTF16LE, + plato_english_utf8_terminated.data, -1, + (void *)plato_english_output.data, plato_english_utf16le.length, &plato_english_output.length) == false, + "conversion from UTF8 to UTF16LE null terminated should fail"); + torture_assert_errno_equal(tctx, E2BIG, "conversion from UTF8 to UTF16LE should fail E2BIG"); + torture_assert_data_blob_equal(tctx, plato_english_output, plato_english_utf16le, "conversion from UTF8 to UTF16LE null terminated"); + + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF8, CH_UTF16LE, + plato_english_utf8_terminated.data, -1, + (void *)plato_english_output.data, plato_english_utf16le.length - 1, &plato_english_output.length) == false, + "conversion from UTF8 to UTF16LE null terminated should fail"); + torture_assert_errno_equal(tctx, E2BIG, "conversion from UTF8 to UTF16LE should fail E2BIG"); + + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF8, CH_UTF16LE, + plato_english_utf8_terminated.data, -1, + (void *)plato_english_output.data, plato_english_utf16le.length - 2, &plato_english_output.length) == false, + "conversion from UTF8 to UTF16LE null terminated should fail"); + torture_assert_errno_equal(tctx, E2BIG, "conversion from UTF8 to UTF16LE should fail E2BIG"); + + plato_english_output = data_blob_talloc(tctx, NULL, plato_english_utf8.length + 10); + + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF16LE, CH_UTF8, + plato_english_utf16le_terminated.data, -1, + (void *)plato_english_output.data, plato_english_output.length, &plato_english_output.length), + "conversion from UTF16LE to UTF8 null terminated"); + torture_assert_data_blob_equal(tctx, plato_english_output, plato_english_utf8_terminated, "conversion from UTF16LE to UTF8 null terminated"); + + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF16LE, CH_UTF8, + plato_english_utf16le_terminated.data, -1, + (void *)plato_english_output.data, plato_english_utf8.length, &plato_english_output.length) == false, + "conversion from UTF16LE to UTF8 null terminated should fail"); + torture_assert_errno_equal(tctx, E2BIG, "conversion from UTF16LE to UTF8 should fail E2BIG"); + torture_assert_data_blob_equal(tctx, plato_english_output, plato_english_utf8, "conversion from UTF16LE to UTF8 null terminated"); + + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF16LE, CH_UTF8, + plato_english_utf16le_terminated.data, -1, + (void *)plato_english_output.data, plato_english_utf8.length - 1, &plato_english_output.length) == false, + "conversion from UTF16LE to UTF8 null terminated should fail"); + torture_assert_errno_equal(tctx, E2BIG, "conversion from UTF16LE to UTF8 should fail E2BIG"); + + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF16LE, CH_UTF8, + plato_english_utf16le_terminated.data, -1, + (void *)plato_english_output.data, plato_english_utf8.length - 2, &plato_english_output.length) == false, + "conversion from UTF16LE to UTF8 null terminated should fail"); + torture_assert_errno_equal(tctx, E2BIG, "conversion from UTF16LE to UTF8 should fail E2BIG"); + + /* Now null terminate the string early, the confirm we don't skip the NULL and convert any further */ + plato_english_utf8_terminated.data[3] = '\0'; + plato_english_utf8_terminated.length = 4; /* used for the comparison only */ + + plato_english_utf16le_terminated.data[6] = '\0'; + plato_english_utf16le_terminated.data[7] = '\0'; + plato_english_utf16le_terminated.length = 8; /* used for the comparison only */ + + plato_english_output = data_blob_talloc(tctx, NULL, plato_english_utf16le.length + 10); + + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF8, CH_UTF16LE, + plato_english_utf8_terminated.data, -1, + (void *)plato_english_output.data, plato_english_output.length, &plato_english_output.length), + "conversion from UTF8 to UTF16LE null terminated"); + torture_assert_data_blob_equal(tctx, plato_english_output, plato_english_utf16le_terminated, "conversion from UTF8 to UTF16LE null terminated early"); + + plato_english_output = data_blob_talloc(tctx, NULL, plato_english_utf8.length + 10); + + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF16LE, CH_UTF8, + plato_english_utf16le_terminated.data, -1, + (void *)plato_english_output.data, plato_english_output.length, &plato_english_output.length), + "conversion from UTF16LE to UTF8 null terminated"); + torture_assert_data_blob_equal(tctx, plato_english_output, plato_english_utf8_terminated, "conversion from UTF16LE to UTF8 null terminated early"); + + + /* Now null terminate the string particularly early, the confirm we don't skip the NULL and convert any further */ + plato_english_utf8_terminated.data[1] = '\0'; + plato_english_utf8_terminated.length = 2; /* used for the comparison only */ + + plato_english_utf16le_terminated.data[2] = '\0'; + plato_english_utf16le_terminated.data[3] = '\0'; + plato_english_utf16le_terminated.length = 4; /* used for the comparison only */ + + plato_english_output = data_blob_talloc(tctx, NULL, plato_english_utf16le.length + 10); + + torture_assert(tctx, convert_string_error_handle(iconv_handle, CH_UTF8, CH_UTF16LE, + plato_english_utf8_terminated.data, -1, + (void *)plato_english_output.data, plato_english_output.length, &plato_english_output.length), + "conversion from UTF8 to UTF16LE null terminated"); + torture_assert_data_blob_equal(tctx, plato_english_output, plato_english_utf16le_terminated, "conversion from UTF8 to UTF16LE null terminated very early"); + + plato_english_output = data_blob_talloc(tctx, NULL, plato_english_utf8.length + 10); + + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF16LE, CH_UTF8, + plato_english_utf16le_terminated.data, -1, + (void *)plato_english_output.data, plato_english_output.length, &plato_english_output.length), + "conversion from UTF16LE to UTF8 null terminated"); + torture_assert_data_blob_equal(tctx, plato_english_output, plato_english_utf8_terminated, "conversion from UTF16LE to UTF8 null terminated very early"); + + return true; +} + +static bool test_plato_minus_1_handle(struct torture_context *tctx) +{ + struct smb_iconv_handle *iconv_handle; + DATA_BLOB plato_utf8 = base64_decode_data_blob(plato_utf8_base64); + DATA_BLOB plato_utf16le = base64_decode_data_blob(plato_utf16le_base64); + DATA_BLOB plato_output; + DATA_BLOB plato_utf8_terminated; + DATA_BLOB plato_utf16le_terminated; + + talloc_steal(tctx, plato_utf8.data); + talloc_steal(tctx, plato_utf16le.data); + + iconv_handle = get_iconv_testing_handle(tctx, "ISO8859-1", "CP850"); + torture_assert(tctx, iconv_handle, "getting iconv handle"); + + plato_utf8_terminated = data_blob_talloc(tctx, NULL, plato_utf8.length + 1); + memcpy(plato_utf8_terminated.data, plato_utf8.data, plato_utf8.length); + plato_utf8_terminated.data[plato_utf8.length] = '\0'; + + plato_utf16le_terminated = data_blob_talloc(tctx, NULL, plato_utf16le.length + 2); + memcpy(plato_utf16le_terminated.data, plato_utf16le.data, plato_utf16le.length); + plato_utf16le_terminated.data[plato_utf16le.length] = '\0'; + plato_utf16le_terminated.data[plato_utf16le.length + 1] = '\0'; + + plato_output = data_blob_talloc(tctx, NULL, plato_utf16le.length + 10); + + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF8, CH_UTF16LE, + plato_utf8_terminated.data, -1, + (void *)plato_output.data, plato_output.length, &plato_output.length), + "conversion from UTF8 to UTF16LE null terminated"); + torture_assert_data_blob_equal(tctx, plato_output, plato_utf16le_terminated, "conversion from UTF8 to UTF16LE null terminated"); + + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF8, CH_UTF16LE, + plato_utf8_terminated.data, -1, + (void *)plato_output.data, plato_utf16le.length, &plato_output.length) == false, + "conversion from UTF8 to UTF16LE null terminated should fail"); + torture_assert_errno_equal(tctx, E2BIG, "conversion from UTF8 to UTF16LE should fail E2BIG"); + torture_assert_data_blob_equal(tctx, plato_output, plato_utf16le, "conversion from UTF8 to UTF16LE null terminated"); + + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF8, CH_UTF16LE, + plato_utf8_terminated.data, -1, + (void *)plato_output.data, plato_utf16le.length - 1, &plato_output.length) == false, + "conversion from UTF8 to UTF16LE null terminated should fail"); + torture_assert_errno_equal(tctx, E2BIG, "conversion from UTF8 to UTF16LE should fail E2BIG"); + + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF8, CH_UTF16LE, + plato_utf8_terminated.data, -1, + (void *)plato_output.data, plato_utf16le.length - 2, &plato_output.length) == false, + "conversion from UTF8 to UTF16LE null terminated should fail"); + torture_assert_errno_equal(tctx, E2BIG, "conversion from UTF8 to UTF16LE should fail E2BIG"); + + plato_output = data_blob_talloc(tctx, NULL, plato_utf8.length + 10); + + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF16LE, CH_UTF8, + plato_utf16le_terminated.data, -1, + (void *)plato_output.data, plato_output.length, &plato_output.length), + "conversion from UTF16LE to UTF8 null terminated"); + torture_assert_data_blob_equal(tctx, plato_output, plato_utf8_terminated, "conversion from UTF16LE to UTF8 null terminated"); + + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF16LE, CH_UTF8, + plato_utf16le_terminated.data, -1, + (void *)plato_output.data, plato_utf8.length, &plato_output.length) == false, + "conversion from UTF16LE to UTF8 null terminated should fail"); + torture_assert_errno_equal(tctx, E2BIG, "conversion from UTF16LE to UTF8 should fail E2BIG"); + torture_assert_data_blob_equal(tctx, plato_output, plato_utf8, "conversion from UTF16LE to UTF8 null terminated"); + + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF16LE, CH_UTF8, + plato_utf16le_terminated.data, -1, + (void *)plato_output.data, plato_utf8.length - 1, &plato_output.length) == false, + "conversion from UTF16LE to UTF8 null terminated should fail"); + torture_assert_errno_equal(tctx, E2BIG, "conversion from UTF16LE to UTF8 should fail E2BIG"); + + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF16LE, CH_UTF8, + plato_utf16le_terminated.data, -1, + (void *)plato_output.data, plato_utf8.length - 2, &plato_output.length) == false, + "conversion from UTF16LE to UTF8 null terminated should fail"); + torture_assert_errno_equal(tctx, E2BIG, "conversion from UTF16LE to UTF8 should fail E2BIG"); + + /* Now null terminate the string early, the confirm we don't skip the NULL and convert any further */ + plato_utf8_terminated.data[5] = '\0'; + plato_utf8_terminated.length = 6; /* used for the comparison only */ + + plato_utf16le_terminated.data[4] = '\0'; + plato_utf16le_terminated.data[5] = '\0'; + plato_utf16le_terminated.length = 6; /* used for the comparison only */ + + plato_output = data_blob_talloc(tctx, NULL, plato_utf16le.length + 10); + + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF8, CH_UTF16LE, + plato_utf8_terminated.data, -1, + (void *)plato_output.data, plato_output.length, &plato_output.length), + "conversion from UTF8 to UTF16LE null terminated"); + torture_assert_data_blob_equal(tctx, plato_output, plato_utf16le_terminated, "conversion from UTF8 to UTF16LE null terminated early"); + + plato_output = data_blob_talloc(tctx, NULL, plato_utf8.length + 10); + + torture_assert(tctx, convert_string_error_handle(iconv_handle, + CH_UTF16LE, CH_UTF8, + plato_utf16le_terminated.data, -1, + (void *)plato_output.data, plato_output.length, &plato_output.length), + "conversion from UTF16LE to UTF8 null terminated"); + torture_assert_data_blob_equal(tctx, plato_output, plato_utf8_terminated, "conversion from UTF16LE to UTF8 null terminated early"); + + return true; +} + static bool test_plato_cp850_utf8_handle(struct torture_context *tctx) { struct smb_iconv_handle *iconv_handle; @@ -483,7 +923,7 @@ static bool test_plato_cp850_utf8_handle(struct torture_context *tctx) talloc_steal(tctx, plato_utf8.data); talloc_steal(tctx, plato_utf16le.data); - iconv_handle = get_iconv_testing_handle(tctx, "CP850", "UTF8", "UTF8"); + iconv_handle = get_iconv_testing_handle(tctx, "CP850", "UTF8"); torture_assert(tctx, iconv_handle, "creating iconv handle"); torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle, @@ -568,11 +1008,11 @@ static bool test_plato_cp850_utf8_handle(struct torture_context *tctx) torture_assert_data_blob_equal(tctx, plato_output, plato_utf8, "conversion from UTF8 to (unix charset) UTF8 incorrect"); torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle, - CH_UTF8, CH_DISPLAY, + CH_UTF8, CH_UTF8, plato_utf8.data, plato_utf8.length, (void *)&plato_output.data, &plato_output.length), "conversion of UTF16 ancient greek to unix charset UTF8 failed"); - torture_assert_data_blob_equal(tctx, plato_output, plato_utf8, "conversion from UTF8 to (display charset) UTF8 incorrect"); + torture_assert_data_blob_equal(tctx, plato_output, plato_utf8, "conversion from UTF8 to UTF8 incorrect"); torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle, CH_UTF16LE, CH_DOS, @@ -627,39 +1067,39 @@ static bool test_plato_cp850_utf8_handle(struct torture_context *tctx) "conversion of UTF16 ancient greek to UTF8 failed"); torture_assert_data_blob_equal(tctx, plato_output, plato_utf8, "conversion from UTF16LE to UTF8 incorrect"); torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle, - CH_UTF16LE, CH_DISPLAY, + CH_UTF16LE, CH_UTF8, plato_utf16le.data, plato_utf16le.length, (void *)&plato_output.data, &plato_output.length), - "conversion of UTF16 ancient greek to display charset UTF8 failed"); - torture_assert_data_blob_equal(tctx, plato_output, plato_utf8, "conversion from UTF16LE to (display charset) UTF8 incorrect"); + "conversion of UTF16 ancient greek to UTF8 failed"); + torture_assert_data_blob_equal(tctx, plato_output, plato_utf8, "conversion from UTF16LE to UTF8 incorrect"); torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle, - CH_DISPLAY, CH_UTF16LE, + CH_UTF8, CH_UTF16LE, plato_output.data, plato_output.length, (void *)&plato_output2.data, &plato_output2.length), - "round trip conversion of UTF16 ancient greek to display charset UTF8 and back again failed"); + "round trip conversion of UTF16 ancient greek to UTF8 and back again failed"); torture_assert_data_blob_equal(tctx, plato_output2, plato_utf16le, - "round trip conversion of UTF16 ancient greek to display charset UTF8 and back again failed"); + "round trip conversion of UTF16 ancient greek to UTF8 and back again failed"); torture_assert_int_equal(tctx, strlen_m_ext_handle(iconv_handle, (const char *)plato_output.data, - CH_DISPLAY, CH_UTF16LE), + CH_UTF8, CH_UTF16LE), plato_output2.length / 2, - "checking strlen_m_ext of round trip conversion of UTF16 latin charset greek to display charset UTF8 and back again"); + "checking strlen_m_ext of round trip conversion of UTF16 latin charset greek to UTF8 and back again"); torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle, - CH_DISPLAY, CH_UTF8, + CH_UTF8, CH_UTF8, plato_output.data, plato_output.length, (void *)&plato_output2.data, &plato_output2.length), - "conversion of display charset UTF8 to UTF8"); + "conversion of UTF8 to UTF8"); torture_assert_data_blob_equal(tctx, plato_output2, plato_utf8, - "conversion of display charset UTF8 to UTF8"); + "conversion of UTF8 to UTF8"); torture_assert_int_equal(tctx, strlen_m_ext_handle(iconv_handle, (const char *)plato_output.data, - CH_DISPLAY, CH_UTF8), + CH_UTF8, CH_UTF8), plato_output2.length, - "checking strlen_m_ext of conversion of display charset UTF8 to UTF8"); + "checking strlen_m_ext of conversion of UTF8 to UTF8"); return true; } @@ -674,7 +1114,7 @@ static bool test_plato_latin_cp850_utf8_handle(struct torture_context *tctx) talloc_steal(tctx, plato_latin_utf8.data); talloc_steal(tctx, plato_latin_utf16le.data); - iconv_handle = get_iconv_testing_handle(tctx, "CP850", "UTF8", "UTF8"); + iconv_handle = get_iconv_testing_handle(tctx, "CP850", "UTF8"); torture_assert(tctx, iconv_handle, "creating iconv handle"); torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle, @@ -691,11 +1131,11 @@ static bool test_plato_latin_cp850_utf8_handle(struct torture_context *tctx) torture_assert_data_blob_equal(tctx, plato_latin_output, plato_latin_utf8, "conversion from UTF8 to (unix charset) UTF8 incorrect"); torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle, - CH_UTF8, CH_DISPLAY, + CH_UTF8, CH_UTF8, plato_latin_utf8.data, plato_latin_utf8.length, (void *)&plato_latin_output.data, &plato_latin_output.length), "conversion of UTF16 latin charset greek to unix charset UTF8 failed"); - torture_assert_data_blob_equal(tctx, plato_latin_output, plato_latin_utf8, "conversion from UTF8 to (display charset) UTF8 incorrect"); + torture_assert_data_blob_equal(tctx, plato_latin_output, plato_latin_utf8, "conversion from UTF8 to UTF8 incorrect"); torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle, CH_UTF16LE, CH_DOS, @@ -711,25 +1151,25 @@ static bool test_plato_latin_cp850_utf8_handle(struct torture_context *tctx) torture_assert_data_blob_equal(tctx, plato_latin_output, plato_latin_utf8, "conversion from UTF16LE to (unix charset) CP850 incorrect"); torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle, - CH_UTF16LE, CH_DISPLAY, + CH_UTF16LE, CH_UTF8, plato_latin_utf16le.data, plato_latin_utf16le.length, (void *)&plato_latin_output.data, &plato_latin_output.length), - "conversion of UTF16 latin charset greek to display charset UTF8 failed"); - torture_assert_data_blob_equal(tctx, plato_latin_output, plato_latin_utf8, "conversion from UTF16LE to (display charset) UTF8 incorrect"); + "conversion of UTF16 latin charset greek to UTF8 failed"); + torture_assert_data_blob_equal(tctx, plato_latin_output, plato_latin_utf8, "conversion from UTF16LE to UTF8 incorrect"); torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle, - CH_DISPLAY, CH_UTF16LE, + CH_UTF8, CH_UTF16LE, plato_latin_output.data, plato_latin_output.length, (void *)&plato_latin_output2.data, &plato_latin_output2.length), - "round trip conversion of UTF16 latin charset greek to display charset UTF8 and back again failed"); + "round trip conversion of UTF16 latin charset greek to UTF8 and back again failed"); torture_assert_data_blob_equal(tctx, plato_latin_output2, plato_latin_utf16le, - "round trip conversion of UTF16 latin charset greek to display charset UTF8 and back again failed"); + "round trip conversion of UTF16 latin charset greek to UTF8 and back again failed"); torture_assert_int_equal(tctx, strlen_m_ext_handle(iconv_handle, (const char *)plato_latin_output.data, - CH_DISPLAY, CH_UTF16LE), + CH_UTF8, CH_UTF16LE), plato_latin_output2.length / 2, - "checking strlen_m_ext of round trip conversion of UTF16 latin charset greek to display charset UTF8 and back again"); + "checking strlen_m_ext of round trip conversion of UTF16 latin charset greek to UTF8 and back again"); return true; } @@ -742,7 +1182,7 @@ static bool test_gd_case_utf8_handle(struct torture_context *tctx) char *gd_lower, *gd_upper; talloc_steal(tctx, gd_utf8.data); - iconv_handle = get_iconv_testing_handle(tctx, "ASCII", "UTF8", "UTF8"); + iconv_handle = get_iconv_testing_handle(tctx, "ASCII", "UTF8"); torture_assert(tctx, iconv_handle, "getting utf8 iconv handle"); torture_assert(tctx, @@ -805,7 +1245,7 @@ static bool test_gd_case_cp850_handle(struct torture_context *tctx) char *gd_lower, *gd_upper; talloc_steal(tctx, gd_cp850.data); - iconv_handle = get_iconv_testing_handle(tctx, "ASCII", "CP850", "CP850"); + iconv_handle = get_iconv_testing_handle(tctx, "ASCII", "CP850"); torture_assert(tctx, iconv_handle, "getting cp850 iconv handle"); torture_assert(tctx, @@ -866,7 +1306,7 @@ static bool test_plato_case_utf8_handle(struct torture_context *tctx) char *plato_lower, *plato_upper; talloc_steal(tctx, plato_utf8.data); - iconv_handle = get_iconv_testing_handle(tctx, "ASCII", "UTF8", "UTF8"); + iconv_handle = get_iconv_testing_handle(tctx, "ASCII", "UTF8"); torture_assert(tctx, iconv_handle, "getting utf8 iconv handle"); torture_assert(tctx, @@ -1248,9 +1688,12 @@ struct torture_suite *torture_local_convert_string_handle(TALLOC_CTX *mem_ctx) struct torture_suite *suite = torture_suite_create(mem_ctx, "convert_string_handle"); torture_suite_add_simple_test(suite, "gd_ascii", test_gd_ascii_handle); + torture_suite_add_simple_test(suite, "gd_minus_1", test_gd_minus_1_handle); torture_suite_add_simple_test(suite, "gd_iso8859_cp850", test_gd_iso8859_cp850_handle); torture_suite_add_simple_test(suite, "plato_english_iso8859_cp850", test_plato_english_iso8859_cp850_handle); + torture_suite_add_simple_test(suite, "plato_english_minus_1", test_plato_english_minus_1_handle); torture_suite_add_simple_test(suite, "plato_cp850_utf8", test_plato_cp850_utf8_handle); + torture_suite_add_simple_test(suite, "plato_minus_1", test_plato_minus_1_handle); torture_suite_add_simple_test(suite, "plato_latin_cp850_utf8", test_plato_latin_cp850_utf8_handle); return suite; } diff --git a/lib/util/charset/util_str.c b/lib/util/charset/util_str.c index e8f0b788b1..688ab5a0a1 100644 --- a/lib/util/charset/util_str.c +++ b/lib/util/charset/util_str.c @@ -5,6 +5,8 @@ Copyright (C) Simo Sorce 2001 Copyright (C) Andrew Bartlett 2011 Copyright (C) Jeremy Allison 1992-2007 + Copyright (C) Martin Pool 2003 + Copyright (C) James Peach 2006 This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -167,7 +169,6 @@ _PUBLIC_ size_t strlen_m_ext_handle(struct smb_iconv_handle *ic, switch (dst_charset) { case CH_DOS: case CH_UNIX: - case CH_DISPLAY: smb_panic("cannot call strlen_m_ext() with a variable dest charset (must be UTF16* or UTF8)"); default: break; @@ -327,7 +328,7 @@ _PUBLIC_ char *strchr_m(const char *src, char c) for (s = src; *s && !(((unsigned char)s[0]) & 0x80); s++) { if (*s == c) - return (char *)s; + return discard_const_p(char, s); } if (!*s) @@ -395,7 +396,7 @@ _PUBLIC_ char *strrchr_m(const char *s, char c) break; } /* No - we have a match ! */ - return (char *)cp; + return discard_const_p(char , cp); } } while (cp-- != s); if (!got_mb) @@ -473,3 +474,84 @@ _PUBLIC_ bool strhasupper(const char *string) struct smb_iconv_handle *ic = get_iconv_handle(); return strhasupper_handle(ic, string); } + +/*********************************************************************** + strstr_m - We convert via ucs2 for now. +***********************************************************************/ + +char *strstr_m(const char *src, const char *findstr) +{ + smb_ucs2_t *p; + smb_ucs2_t *src_w, *find_w; + const char *s; + char *s2; + char *retp; + size_t converted_size, findstr_len = 0; + + TALLOC_CTX *frame; /* Only set up in the iconv case */ + + /* for correctness */ + if (!findstr[0]) { + return discard_const_p(char, src); + } + + /* Samba does single character findstr calls a *lot*. */ + if (findstr[1] == '\0') + return strchr_m(src, *findstr); + + /* We optimise for the ascii case, knowing that all our + supported multi-byte character sets are ascii-compatible + (ie. they match for the first 128 chars) */ + + for (s = src; *s && !(((unsigned char)s[0]) & 0x80); s++) { + if (*s == *findstr) { + if (!findstr_len) + findstr_len = strlen(findstr); + + if (strncmp(s, findstr, findstr_len) == 0) { + return discard_const_p(char, s); + } + } + } + + if (!*s) + return NULL; + +#if 1 /* def BROKEN_UNICODE_COMPOSE_CHARACTERS */ + /* 'make check' fails unless we do this */ + + /* With compose characters we must restart from the beginning. JRA. */ + s = src; +#endif + + frame = talloc_stackframe(); + + if (!push_ucs2_talloc(frame, &src_w, src, &converted_size)) { + DEBUG(0,("strstr_m: src malloc fail\n")); + TALLOC_FREE(frame); + return NULL; + } + + if (!push_ucs2_talloc(frame, &find_w, findstr, &converted_size)) { + DEBUG(0,("strstr_m: find malloc fail\n")); + TALLOC_FREE(frame); + return NULL; + } + + p = strstr_w(src_w, find_w); + + if (!p) { + TALLOC_FREE(frame); + return NULL; + } + + *p = 0; + if (!pull_ucs2_talloc(frame, &s2, src_w, &converted_size)) { + TALLOC_FREE(frame); + DEBUG(0,("strstr_m: dest malloc fail\n")); + return NULL; + } + retp = discard_const_p(char, (s+strlen(s2))); + TALLOC_FREE(frame); + return retp; +} diff --git a/lib/util/charset/util_unistr.c b/lib/util/charset/util_unistr.c index a1be501c7c..e4ae65053c 100644 --- a/lib/util/charset/util_unistr.c +++ b/lib/util/charset/util_unistr.c @@ -161,85 +161,6 @@ _PUBLIC_ char *talloc_strdup_upper(TALLOC_CTX *ctx, const char *src) } /** - Convert a string to lower case. -**/ -_PUBLIC_ void strlower_m(char *s) -{ - char *d; - struct smb_iconv_handle *iconv_handle; - - /* this is quite a common operation, so we want it to be - fast. We optimise for the ascii case, knowing that all our - supported multi-byte character sets are ascii-compatible - (ie. they match for the first 128 chars) */ - while (*s && !(((uint8_t)*s) & 0x80)) { - *s = tolower((uint8_t)*s); - s++; - } - - if (!*s) - return; - - iconv_handle = get_iconv_handle(); - - d = s; - - while (*s) { - size_t c_size, c_size2; - codepoint_t c = next_codepoint_handle(iconv_handle, s, &c_size); - c_size2 = push_codepoint_handle(iconv_handle, d, tolower_m(c)); - if (c_size2 > c_size) { - DEBUG(0,("FATAL: codepoint 0x%x (0x%x) expanded from %d to %d bytes in strlower_m\n", - c, tolower_m(c), (int)c_size, (int)c_size2)); - smb_panic("codepoint expansion in strlower_m\n"); - } - s += c_size; - d += c_size2; - } - *d = 0; -} - -/** - Convert a string to UPPER case. -**/ -_PUBLIC_ void strupper_m(char *s) -{ - char *d; - struct smb_iconv_handle *iconv_handle; - - /* this is quite a common operation, so we want it to be - fast. We optimise for the ascii case, knowing that all our - supported multi-byte character sets are ascii-compatible - (ie. they match for the first 128 chars) */ - while (*s && !(((uint8_t)*s) & 0x80)) { - *s = toupper((uint8_t)*s); - s++; - } - - if (!*s) - return; - - iconv_handle = get_iconv_handle(); - - d = s; - - while (*s) { - size_t c_size, c_size2; - codepoint_t c = next_codepoint_handle(iconv_handle, s, &c_size); - c_size2 = push_codepoint_handle(iconv_handle, d, toupper_m(c)); - if (c_size2 > c_size) { - DEBUG(0,("FATAL: codepoint 0x%x (0x%x) expanded from %d to %d bytes in strupper_m\n", - c, toupper_m(c), (int)c_size, (int)c_size2)); - smb_panic("codepoint expansion in strupper_m\n"); - } - s += c_size; - d += c_size2; - } - *d = 0; -} - - -/** Find the number of 'c' chars in a string **/ _PUBLIC_ size_t count_chars_m(const char *s, char c) @@ -273,7 +194,7 @@ _PUBLIC_ size_t count_chars_m(const char *s, char c) * @param dest_len the maximum length in bytes allowed in the * destination. If @p dest_len is -1 then no maximum is used. **/ -static bool push_ascii(void *dest, const char *src, size_t dest_len, int flags, size_t *converted_size) +static bool push_ascii_string(void *dest, const char *src, size_t dest_len, int flags, size_t *converted_size) { size_t src_len; bool ret; @@ -283,7 +204,7 @@ static bool push_ascii(void *dest, const char *src, size_t dest_len, int flags, if (tmpbuf == NULL) { return false; } - ret = push_ascii(dest, tmpbuf, dest_len, flags & ~STR_UPPER, converted_size); + ret = push_ascii_string(dest, tmpbuf, dest_len, flags & ~STR_UPPER, converted_size); talloc_free(tmpbuf); return ret; } @@ -297,23 +218,6 @@ static bool push_ascii(void *dest, const char *src, size_t dest_len, int flags, } /** - * Copy a string from a unix char* src to an ASCII destination, - * allocating a buffer using talloc(). - * - * @param dest always set at least to NULL - * - * @returns The number of bytes occupied by the string in the destination - * or -1 in case of error. - **/ -_PUBLIC_ bool push_ascii_talloc(TALLOC_CTX *ctx, char **dest, const char *src, size_t *converted_size) -{ - size_t src_len = strlen(src)+1; - *dest = NULL; - return convert_string_talloc(ctx, CH_UNIX, CH_DOS, src, src_len, (void **)dest, converted_size); -} - - -/** * Copy a string from a dos codepage source to a unix char* destination. * * The resulting string in "dest" is always null terminated. @@ -328,7 +232,7 @@ _PUBLIC_ bool push_ascii_talloc(TALLOC_CTX *ctx, char **dest, const char *src, s * @param src_len is the length of the source area in bytes. * @returns the number of bytes occupied by the string in @p src. **/ -static ssize_t pull_ascii(char *dest, const void *src, size_t dest_len, size_t src_len, int flags) +static ssize_t pull_ascii_string(char *dest, const void *src, size_t dest_len, size_t src_len, int flags) { size_t size = 0; @@ -411,38 +315,6 @@ static ssize_t push_ucs2(void *dest, const char *src, size_t dest_len, int flags /** - * Copy a string from a unix char* src to a UCS2 destination, - * allocating a buffer using talloc(). - * - * @param dest always set at least to NULL - * - * @returns The number of bytes occupied by the string in the destination - * or -1 in case of error. - **/ -_PUBLIC_ bool push_ucs2_talloc(TALLOC_CTX *ctx, smb_ucs2_t **dest, const char *src, size_t *converted_size) -{ - size_t src_len = strlen(src)+1; - *dest = NULL; - return convert_string_talloc(ctx, CH_UNIX, CH_UTF16, src, src_len, (void **)dest, converted_size); -} - - -/** - * Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer using talloc - * - * @param dest always set at least to NULL - * - * @returns The number of bytes occupied by the string in the destination - **/ - -_PUBLIC_ bool push_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src, size_t *converted_size) -{ - size_t src_len = strlen(src)+1; - *dest = NULL; - return convert_string_talloc(ctx, CH_UNIX, CH_UTF8, src, src_len, (void **)dest, converted_size); -} - -/** Copy a string from a ucs2 source to a unix char* destination. Flags can have: STR_TERMINATE means the string in src is null terminated. @@ -484,51 +356,6 @@ static size_t pull_ucs2(char *dest, const void *src, size_t dest_len, size_t src } /** - * Copy a string from a ASCII src to a unix char * destination, allocating a buffer using talloc - * - * @param dest always set at least to NULL - * - * @returns The number of bytes occupied by the string in the destination - **/ - -_PUBLIC_ bool pull_ascii_talloc(TALLOC_CTX *ctx, char **dest, const char *src, size_t *converted_size) -{ - size_t src_len = strlen(src)+1; - *dest = NULL; - return convert_string_talloc(ctx, CH_DOS, CH_UNIX, src, src_len, (void **)dest, converted_size); -} - -/** - * Copy a string from a UCS2 src to a unix char * destination, allocating a buffer using talloc - * - * @param dest always set at least to NULL - * - * @returns The number of bytes occupied by the string in the destination - **/ - -_PUBLIC_ bool pull_ucs2_talloc(TALLOC_CTX *ctx, char **dest, const smb_ucs2_t *src, size_t *converted_size) -{ - size_t src_len = utf16_len(src); - *dest = NULL; - return convert_string_talloc(ctx, CH_UTF16, CH_UNIX, src, src_len, (void **)dest, converted_size); -} - -/** - * Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer using talloc - * - * @param dest always set at least to NULL - * - * @returns The number of bytes occupied by the string in the destination - **/ - -_PUBLIC_ bool pull_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src, size_t *converted_size) -{ - size_t src_len = strlen(src)+1; - *dest = NULL; - return convert_string_talloc(ctx, CH_UTF8, CH_UNIX, src, src_len, (void **)dest, converted_size); -} - -/** Copy a string from a char* src to a unicode or ascii dos codepage destination choosing unicode or ascii based on the flags in the SMB buffer starting at base_ptr. @@ -546,7 +373,7 @@ _PUBLIC_ ssize_t push_string(void *dest, const char *src, size_t dest_len, int f { if (flags & STR_ASCII) { size_t size = 0; - if (push_ascii(dest, src, dest_len, flags, &size)) { + if (push_ascii_string(dest, src, dest_len, flags, &size)) { return (ssize_t)size; } else { return (ssize_t)-1; @@ -577,7 +404,7 @@ _PUBLIC_ ssize_t push_string(void *dest, const char *src, size_t dest_len, int f _PUBLIC_ ssize_t pull_string(char *dest, const void *src, size_t dest_len, size_t src_len, int flags) { if (flags & STR_ASCII) { - return pull_ascii(dest, src, dest_len, src_len, flags); + return pull_ascii_string(dest, src, dest_len, src_len, flags); } else if (flags & STR_UNICODE) { return pull_ucs2(dest, src, dest_len, src_len, flags); } else { @@ -585,68 +412,3 @@ _PUBLIC_ ssize_t pull_string(char *dest, const void *src, size_t dest_len, size_ return -1; } } - - -/** - * Convert string from one encoding to another, making error checking etc - * - * @param src pointer to source string (multibyte or singlebyte) - * @param srclen length of the source string in bytes - * @param dest pointer to destination string (multibyte or singlebyte) - * @param destlen maximal length allowed for string - * @param converted_size the number of bytes occupied in the destination - * - * @returns true on success, false on fail. - **/ -_PUBLIC_ bool convert_string(charset_t from, charset_t to, - void const *src, size_t srclen, - void *dest, size_t destlen, - size_t *converted_size) -{ - return convert_string_handle(get_iconv_handle(), from, to, - src, srclen, - dest, destlen, converted_size); -} - -/** - * Convert string from one encoding to another, making error checking etc - * - * @param src pointer to source string (multibyte or singlebyte) - * @param srclen length of the source string in bytes - * @param dest pointer to destination string (multibyte or singlebyte) - * @param destlen maximal length allowed for string - * @param converted_size the number of bytes occupied in the destination - * - * @returns true on success, false on fail. - **/ -_PUBLIC_ bool convert_string_error(charset_t from, charset_t to, - void const *src, size_t srclen, - void *dest, size_t destlen, - size_t *converted_size) -{ - return convert_string_error_handle(get_iconv_handle(), from, to, - src, srclen, - dest, destlen, converted_size); -} - -/** - * Convert between character sets, allocating a new buffer using talloc for the result. - * - * @param srclen length of source buffer. - * @param dest always set at least to NULL - * @param converted_size Size in bytes of the converted string - * @note -1 is not accepted for srclen. - * - * @returns boolean indication whether the conversion succeeded - **/ - -_PUBLIC_ bool convert_string_talloc(TALLOC_CTX *ctx, - charset_t from, charset_t to, - void const *src, size_t srclen, - void *dest, size_t *converted_size) -{ - return convert_string_talloc_handle(ctx, get_iconv_handle(), - from, to, src, srclen, dest, - converted_size); -} - diff --git a/lib/util/charset/util_unistr_w.c b/lib/util/charset/util_unistr_w.c index a550e52776..3fbed7f67c 100644 --- a/lib/util/charset/util_unistr_w.c +++ b/lib/util/charset/util_unistr_w.c @@ -22,8 +22,8 @@ #include "includes.h" /* Copy into a smb_ucs2_t from a possibly unaligned buffer. Return the copied smb_ucs2_t */ -#define COPY_UCS2_CHAR(dest,src) (((unsigned char *)(dest))[0] = ((unsigned char *)(src))[0],\ - ((unsigned char *)(dest))[1] = ((unsigned char *)(src))[1], (dest)) +#define COPY_UCS2_CHAR(dest,src) (((unsigned char *)(dest))[0] = ((const unsigned char *)(src))[0],\ + ((unsigned char *)(dest))[1] = ((const unsigned char *)(src))[1], (dest)) /* return an ascii version of a ucs2 character */ @@ -72,12 +72,12 @@ smb_ucs2_t *strchr_w(const smb_ucs2_t *s, smb_ucs2_t c) smb_ucs2_t cp; while (*(COPY_UCS2_CHAR(&cp,s))) { if (c == cp) { - return (smb_ucs2_t *)s; + return discard_const_p(smb_ucs2_t, s); } s++; } if (c == cp) { - return (smb_ucs2_t *)s; + return discard_const_p(smb_ucs2_t, s); } return NULL; @@ -104,7 +104,7 @@ smb_ucs2_t *strrchr_w(const smb_ucs2_t *s, smb_ucs2_t c) p += (len - 1); do { if (c == *(COPY_UCS2_CHAR(&cp,p))) { - return (smb_ucs2_t *)p; + return discard_const_p(smb_ucs2_t, p); } } while (p-- != s); return NULL; @@ -234,38 +234,6 @@ static int strncmp_w(const smb_ucs2_t *a, const smb_ucs2_t *b, size_t len) return (len - n)?(*(COPY_UCS2_CHAR(&cpa,a)) - *(COPY_UCS2_CHAR(&cpb,b))):0; } -/******************************************************************* - Case insensitive string comparison. -********************************************************************/ - -int strcasecmp_w(const smb_ucs2_t *a, const smb_ucs2_t *b) -{ - smb_ucs2_t cpa, cpb; - - while ((*COPY_UCS2_CHAR(&cpb,b)) && toupper_m(*(COPY_UCS2_CHAR(&cpa,a))) == toupper_m(cpb)) { - a++; - b++; - } - return (tolower_m(*(COPY_UCS2_CHAR(&cpa,a))) - tolower_m(*(COPY_UCS2_CHAR(&cpb,b)))); -} - -/******************************************************************* - Case insensitive string comparison, length limited. -********************************************************************/ - -int strncasecmp_w(const smb_ucs2_t *a, const smb_ucs2_t *b, size_t len) -{ - smb_ucs2_t cpa, cpb; - size_t n = 0; - - while ((n < len) && *COPY_UCS2_CHAR(&cpb,b) && (toupper_m(*(COPY_UCS2_CHAR(&cpa,a))) == toupper_m(cpb))) { - a++; - b++; - n++; - } - return (len - n)?(tolower_m(*(COPY_UCS2_CHAR(&cpa,a))) - tolower_m(*(COPY_UCS2_CHAR(&cpb,b)))):0; -} - /* The *_wa() functions take a combination of 7 bit ascii and wide characters They are used so that you can use string diff --git a/lib/util/charset/weird.c b/lib/util/charset/weird.c new file mode 100644 index 0000000000..5db8cdcecd --- /dev/null +++ b/lib/util/charset/weird.c @@ -0,0 +1,134 @@ +/* + Unix SMB/CIFS implementation. + Samba module with developer tools + Copyright (C) Andrew Tridgell 2001 + Copyright (C) Jelmer Vernooij 2002 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include "includes.h" + +static struct { + char from; + const char *to; + int len; +} weird_table[] = { + {'q', "^q^", 3}, + {'Q', "^Q^", 3}, + {0, NULL} +}; + +static size_t weird_pull(void *cd, const char **inbuf, size_t *inbytesleft, + char **outbuf, size_t *outbytesleft) +{ + while (*inbytesleft >= 1 && *outbytesleft >= 2) { + int i; + int done = 0; + for (i=0;weird_table[i].from;i++) { + if (strncmp((*inbuf), + weird_table[i].to, + weird_table[i].len) == 0) { + if (*inbytesleft < weird_table[i].len) { + DEBUG(0,("ERROR: truncated weird string\n")); + /* smb_panic("weird_pull"); */ + + } else { + (*outbuf)[0] = weird_table[i].from; + (*outbuf)[1] = 0; + (*inbytesleft) -= weird_table[i].len; + (*outbytesleft) -= 2; + (*inbuf) += weird_table[i].len; + (*outbuf) += 2; + done = 1; + break; + } + } + } + if (done) continue; + (*outbuf)[0] = (*inbuf)[0]; + (*outbuf)[1] = 0; + (*inbytesleft) -= 1; + (*outbytesleft) -= 2; + (*inbuf) += 1; + (*outbuf) += 2; + } + + if (*inbytesleft > 0) { + errno = E2BIG; + return -1; + } + + return 0; +} + +static size_t weird_push(void *cd, const char **inbuf, size_t *inbytesleft, + char **outbuf, size_t *outbytesleft) +{ + int ir_count=0; + + while (*inbytesleft >= 2 && *outbytesleft >= 1) { + int i; + int done=0; + for (i=0;weird_table[i].from;i++) { + if ((*inbuf)[0] == weird_table[i].from && + (*inbuf)[1] == 0) { + if (*outbytesleft < weird_table[i].len) { + DEBUG(0,("No room for weird character\n")); + /* smb_panic("weird_push"); */ + } else { + memcpy(*outbuf, weird_table[i].to, + weird_table[i].len); + (*inbytesleft) -= 2; + (*outbytesleft) -= weird_table[i].len; + (*inbuf) += 2; + (*outbuf) += weird_table[i].len; + done = 1; + break; + } + } + } + if (done) continue; + + (*outbuf)[0] = (*inbuf)[0]; + if ((*inbuf)[1]) ir_count++; + (*inbytesleft) -= 2; + (*outbytesleft) -= 1; + (*inbuf) += 2; + (*outbuf) += 1; + } + + if (*inbytesleft == 1) { + errno = EINVAL; + return -1; + } + + if (*inbytesleft > 1) { + errno = E2BIG; + return -1; + } + + return ir_count; +} + +struct charset_functions weird_functions = {"WEIRD", weird_pull, weird_push}; + +NTSTATUS charset_weird_init(void); +NTSTATUS charset_weird_init(void) +{ + if (!smb_register_charset(&weird_functions)) { + return NT_STATUS_INTERNAL_ERROR; + } + return NT_STATUS_OK; +} diff --git a/lib/util/charset/wscript_build b/lib/util/charset/wscript_build index 29e168dce1..1f2c8dfa7a 100644 --- a/lib/util/charset/wscript_build +++ b/lib/util/charset/wscript_build @@ -1,18 +1,44 @@ #!/usr/bin/env python - -if bld.env._SAMBA_BUILD_ == 4: - bld.SAMBA_SUBSYSTEM('CHARSET', - source='charcnv.c util_unistr.c', - public_deps='CODEPOINTS', - public_headers='charset.h', - ) - bld.SAMBA_SUBSYSTEM('ICONV_WRAPPER', source='iconv.c', public_deps='iconv replace talloc') -bld.SAMBA_SUBSYSTEM('CODEPOINTS', - source='codepoints.c util_str.c util_unistr_w.c', - deps='DYNCONFIG ICONV_WRAPPER' - ) +bld.SAMBA_SUBSYSTEM('CHARSET', + public_headers='charset.h', + source='codepoints.c convert_string.c util_str.c util_unistr_w.c charcnv.c pull_push.c util_unistr.c', + deps='DYNCONFIG ICONV_WRAPPER', + public_deps='talloc') + +bld.SAMBA_MODULE('charset_weird', + subsystem='CHARSET', + source='weird.c', + init_function='', + deps='samba-util', + internal_module=bld.SAMBA3_IS_STATIC_MODULE('charset_weird'), + enabled=bld.SAMBA3_IS_ENABLED_MODULE('charset_weird')) + +bld.SAMBA_MODULE('charset_CP850', + subsystem='CHARSET', + source='CP850.c', + init_function='', + deps='samba-util', + internal_module=bld.SAMBA3_IS_STATIC_MODULE('charset_CP850'), + enabled=bld.SAMBA3_IS_ENABLED_MODULE('charset_CP850')) + +bld.SAMBA_MODULE('charset_CP437', + subsystem='CHARSET', + source='CP437.c', + init_function='', + deps='samba-util', + internal_module=bld.SAMBA3_IS_STATIC_MODULE('charset_CP437'), + enabled=bld.SAMBA3_IS_ENABLED_MODULE('charset_CP437')) + +bld.SAMBA_MODULE('charset_macosxfs', + subsystem='CHARSET', + source='charset_macosxfs.c', + init_function='', + internal_module=bld.SAMBA3_IS_STATIC_MODULE('charset_macosxfs'), + enabled=bld.SAMBA3_IS_ENABLED_MODULE('charset_macosxfs')) + + diff --git a/lib/util/data_blob.h b/lib/util/data_blob.h index 83e6cd5f09..558ade9248 100644 --- a/lib/util/data_blob.h +++ b/lib/util/data_blob.h @@ -1,7 +1,10 @@ /* Unix SMB/CIFS implementation. DATA BLOB - + + Copyright (C) Andrew Tridgell 2001 + Copyright (C) Andrew Bartlett 2001 + This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or diff --git a/lib/util/debug.c b/lib/util/debug.c index b0a78823fc..c1b33de6d1 100644 --- a/lib/util/debug.c +++ b/lib/util/debug.c @@ -203,7 +203,7 @@ void gfree_debugsyms(void) TALLOC_FREE(format_bufr); - debug_num_classes = DBGC_MAX_FIXED; + debug_num_classes = 0; state.initialized = false; } diff --git a/lib/util/debug_s3.h b/lib/util/debug_s3.h index 96b8ed74d9..9e5211b19b 100644 --- a/lib/util/debug_s3.h +++ b/lib/util/debug_s3.h @@ -17,6 +17,8 @@ along with this program. If not, see <http://www.gnu.org/licenses/>. */ +#include "librpc/gen_ndr/server_id.h" + struct messaging_context; struct server_id; void debug_message(struct messaging_context *msg_ctx, void *private_data, uint32_t msg_type, struct server_id src, DATA_BLOB *data); diff --git a/lib/util/dprintf.c b/lib/util/dprintf.c index e9a15dcbe6..90ca36c1ae 100644 --- a/lib/util/dprintf.c +++ b/lib/util/dprintf.c @@ -33,58 +33,10 @@ #include "includes.h" #include "system/locale.h" -#include "param/param.h" -static smb_iconv_t display_cd = (smb_iconv_t)-1; - -void d_set_iconv(smb_iconv_t cd) +static int d_vfprintf(FILE *f, const char *format, va_list ap) { - if (display_cd != (smb_iconv_t)-1) - talloc_free(display_cd); - - display_cd = cd; -} - -_PUBLIC_ int d_vfprintf(FILE *f, const char *format, va_list ap) -{ - char *p, *p2; - int ret, clen; - va_list ap2; - - /* If there's nothing to convert, take a shortcut */ - if (display_cd == (smb_iconv_t)-1) { - return vfprintf(f, format, ap); - } - - /* do any message translations */ - va_copy(ap2, ap); - ret = vasprintf(&p, format, ap2); - va_end(ap2); - - if (ret <= 0) return ret; - - clen = iconv_talloc(NULL, display_cd, p, ret, (void **)&p2); - if (clen == -1) { - /* the string can't be converted - do the best we can, - filling in non-printing chars with '?' */ - int i; - for (i=0;i<ret;i++) { - if (isprint(p[i]) || isspace(p[i])) { - fwrite(p+i, 1, 1, f); - } else { - fwrite("?", 1, 1, f); - } - } - SAFE_FREE(p); - return ret; - } - - /* good, its converted OK */ - SAFE_FREE(p); - ret = fwrite(p2, 1, clen, f); - talloc_free(p2); - - return ret; + return vfprintf(f, format, ap); } @@ -100,15 +52,25 @@ _PUBLIC_ int d_fprintf(FILE *f, const char *format, ...) return ret; } -_PUBLIC_ int d_printf(const char *format, ...) +static FILE *outfile; + +_PUBLIC_ int d_printf(const char *format, ...) { int ret; - va_list ap; - - va_start(ap, format); - ret = d_vfprintf(stdout, format, ap); - va_end(ap); - - return ret; + va_list ap; + + if (!outfile) outfile = stdout; + + va_start(ap, format); + ret = d_vfprintf(outfile, format, ap); + va_end(ap); + + return ret; } +/* interactive programs need a way of tell d_*() to write to stderr instead + of stdout */ +void display_set_stderr(void) +{ + outfile = stderr; +} diff --git a/lib/util/fault.c b/lib/util/fault.c index 086dc33545..708dc670d1 100644 --- a/lib/util/fault.c +++ b/lib/util/fault.c @@ -119,7 +119,7 @@ static void smb_panic_default(const char *why) if (panic_action && *panic_action) { char pidstr[20]; char cmdstring[200]; - safe_strcpy(cmdstring, panic_action, sizeof(cmdstring)-1); + strlcpy(cmdstring, panic_action, sizeof(cmdstring)); snprintf(pidstr, sizeof(pidstr), "%d", (int) getpid()); all_string_sub(cmdstring, "%PID%", pidstr, sizeof(cmdstring)); DEBUG(0, ("smb_panic(): calling panic action [%s]\n", cmdstring)); diff --git a/lib/util/ms_fnmatch.c b/lib/util/ms_fnmatch.c index 73fb0e0966..1ba5888ca0 100644 --- a/lib/util/ms_fnmatch.c +++ b/lib/util/ms_fnmatch.c @@ -154,7 +154,7 @@ static int ms_fnmatch_core(const char *p, const char *n, return -1; } -int ms_fnmatch(const char *pattern, const char *string, enum protocol_types protocol) +int ms_fnmatch_protocol(const char *pattern, const char *string, int protocol) { int ret, count, i; struct max_n *max_n = NULL; @@ -192,7 +192,7 @@ int ms_fnmatch(const char *pattern, const char *string, enum protocol_types prot p[i] = '<'; } } - ret = ms_fnmatch(p, string, PROTOCOL_NT1); + ret = ms_fnmatch_protocol(p, string, PROTOCOL_NT1); talloc_free(p); return ret; } @@ -217,5 +217,5 @@ int ms_fnmatch(const char *pattern, const char *string, enum protocol_types prot /** a generic fnmatch function - uses for non-CIFS pattern matching */ int gen_fnmatch(const char *pattern, const char *string) { - return ms_fnmatch(pattern, string, PROTOCOL_NT1); + return ms_fnmatch_protocol(pattern, string, PROTOCOL_NT1); } diff --git a/lib/util/parmlist.c b/lib/util/parmlist.c index 6658fa7e33..0f2f3af8ee 100644 --- a/lib/util/parmlist.c +++ b/lib/util/parmlist.c @@ -20,6 +20,8 @@ #include "../lib/util/dlinklist.h" #include "../lib/util/parmlist.h" +#undef strcasecmp + struct parmlist_entry *parmlist_get(struct parmlist *ctx, const char *name) { struct parmlist_entry *e; diff --git a/lib/util/server_id.c b/lib/util/server_id.c new file mode 100644 index 0000000000..195deeac7c --- /dev/null +++ b/lib/util/server_id.c @@ -0,0 +1,41 @@ +/* + Unix SMB/CIFS implementation. + Samba utility functions + Copyright (C) Andrew Bartlett 2011 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include "includes.h" +#include "librpc/gen_ndr/server_id.h" + +char *server_id_str(TALLOC_CTX *mem_ctx, const struct server_id *id) +{ + if (id->vnn == NONCLUSTER_VNN && id->task_id == 0) { + return talloc_asprintf(mem_ctx, + "%llu", + (unsigned long long)id->pid); + } else if (id->vnn == NONCLUSTER_VNN) { + return talloc_asprintf(mem_ctx, + "%llu.%u", + (unsigned long long)id->pid, + (unsigned)id->task_id); + } else { + return talloc_asprintf(mem_ctx, + "%u:%llu.%u", + (unsigned)id->vnn, + (unsigned long long)id->pid, + (unsigned)id->task_id); + } +} diff --git a/lib/util/string_wrappers.h b/lib/util/string_wrappers.h index 75718e942b..37384fc5a3 100644 --- a/lib/util/string_wrappers.h +++ b/lib/util/string_wrappers.h @@ -41,28 +41,36 @@ size_t __unsafe_string_function_usage_here_size_t__(void); #endif /* HAVE_COMPILER_WILL_OPTIMIZE_OUT_FNS */ -#define safe_strcpy_base(dest, src, base, size) \ - safe_strcpy(dest, src, size-PTR_DIFF(dest,base)-1) +#define strlcpy_base(dest, src, base, size) \ +do { \ + const char *_strlcpy_base_src = (const char *)src; \ + strlcpy((dest), _strlcpy_base_src? _strlcpy_base_src : "", (size)-PTR_DIFF((dest),(base))); \ +} while (0) /* String copy functions - macro hell below adds 'type checking' (limited, but the best we can do in C) */ -#define fstrcpy(d,s) safe_strcpy((d),(s),sizeof(fstring)-1) -#define fstrcat(d,s) safe_strcat((d),(s),sizeof(fstring)-1) -#define nstrcpy(d,s) safe_strcpy((d), (s),sizeof(nstring)-1) -#define unstrcpy(d,s) safe_strcpy((d), (s),sizeof(unstring)-1) - -/* the addition of the DEVELOPER checks in safe_strcpy means we must - * update a lot of code. To make this a little easier here are some - * functions that provide the lengths with less pain */ - -/* overmalloc_safe_strcpy: DEPRECATED! Used when you know the - * destination buffer is longer than maxlength, but you don't know how - * long. This is not a good situation, because we can't do the normal - * sanity checks. Don't use in new code! */ - -#define overmalloc_safe_strcpy(dest,src,maxlength) \ - safe_strcpy_fn(dest,src,maxlength) +#define fstrcpy(d,s) \ +do { \ + const char *_fstrcpy_src = (const char *)(s); \ + strlcpy((d),_fstrcpy_src ? _fstrcpy_src : "",sizeof(fstring)); \ +} while (0) + +#define fstrcat(d,s) \ +do { \ + const char *_fstrcat_src = (const char *)(s); \ + strlcat((d),_fstrcat_src ? _fstrcat_src : "",sizeof(fstring)); \ +} while (0) +#define nstrcpy(d,s) \ +do { \ + const char *_nstrcpy_src = (const char *)(s); \ + strlcpy((d),_nstrcpy_src ? _nstrcpy_src : "",sizeof(fstring)); \ +} while (0) +#define unstrcpy(d,s) \ +do { \ + const char *_unstrcpy_src = (const char *)(s); \ + strlcpy((d),_unstrcpy_src ? _unstrcpy_src : "",sizeof(fstring)); \ +} while (0) #ifdef HAVE_COMPILER_WILL_OPTIMIZE_OUT_FNS @@ -70,16 +78,6 @@ size_t __unsafe_string_function_usage_here_size_t__(void); have the correct types (this works only where sizeof() returns the size of the buffer, not the size of the pointer). */ -#define safe_strcpy(d, s, max_len) \ - (CHECK_STRING_SIZE(d, max_len+1) \ - ? __unsafe_string_function_usage_here__() \ - : safe_strcpy_fn((d), (s), (max_len))) - -#define safe_strcat(d, s, max_len) \ - (CHECK_STRING_SIZE(d, max_len+1) \ - ? __unsafe_string_function_usage_here__() \ - : safe_strcat_fn((d), (s), (max_len))) - #define push_string_check(dest, src, dest_len, flags) \ (CHECK_STRING_SIZE(dest, dest_len) \ ? __unsafe_string_function_usage_here_size_t__() \ @@ -113,8 +111,6 @@ size_t __unsafe_string_function_usage_here_size_t__(void); #else -#define safe_strcpy safe_strcpy_fn -#define safe_strcat safe_strcat_fn #define push_string_check push_string_check_fn #define clistr_push clistr_push_fn #define clistr_pull clistr_pull_fn diff --git a/lib/util/substitute.c b/lib/util/substitute.c index 32945a7213..500d12777f 100644 --- a/lib/util/substitute.c +++ b/lib/util/substitute.c @@ -29,18 +29,20 @@ **/ /** - Substitute a string for a pattern in another string. Make sure there is + Substitute a string for a pattern in another string. Make sure there is enough room! - This routine looks for pattern in s and replaces it with - insert. It may do multiple replacements. + This routine looks for pattern in s and replaces it with + insert. It may do multiple replacements or just one. Any of " ; ' $ or ` in the insert string are replaced with _ if len==0 then the string cannot be extended. This is different from the old use of len==0 which was for no length checks to be done. **/ -_PUBLIC_ void string_sub(char *s, const char *pattern, const char *insert, size_t len) +static void string_sub2(char *s,const char *pattern, const char *insert, size_t len, + bool remove_unsafe_characters, bool replace_once, + bool allow_trailing_dollar) { char *p; ssize_t ls, lp, li, i; @@ -55,9 +57,10 @@ _PUBLIC_ void string_sub(char *s, const char *pattern, const char *insert, size_ if (len == 0) len = ls + 1; /* len is number of *bytes* */ - while (lp <= ls && (p = strstr(s, pattern))) { + while (lp <= ls && (p = strstr_m(s,pattern))) { if (ls + (li-lp) >= len) { - DEBUG(0,("ERROR: string overflow by %d in string_sub(%.50s, %d)\n", + DEBUG(0,("ERROR: string overflow by " + "%d in string_sub(%.50s, %d)\n", (int)(ls + (li-lp) - len), pattern, (int)len)); break; @@ -67,25 +70,50 @@ _PUBLIC_ void string_sub(char *s, const char *pattern, const char *insert, size_ } for (i=0;i<li;i++) { switch (insert[i]) { + case '$': + /* allow a trailing $ + * (as in machine accounts) */ + if (allow_trailing_dollar && (i == li - 1 )) { + p[i] = insert[i]; + break; + } case '`': case '"': case '\'': case ';': - case '$': case '%': case '\r': case '\n': - p[i] = '_'; - break; + if ( remove_unsafe_characters ) { + p[i] = '_'; + /* yes this break should be here + * since we want to fall throw if + * not replacing unsafe chars */ + break; + } default: p[i] = insert[i]; } } s = p + li; ls += (li-lp); + + if (replace_once) + break; } } +void string_sub_once(char *s, const char *pattern, + const char *insert, size_t len) +{ + string_sub2( s, pattern, insert, len, true, true, false ); +} + +void string_sub(char *s,const char *pattern, const char *insert, size_t len) +{ + string_sub2( s, pattern, insert, len, true, false, false ); +} + /** * Talloc'ed version of string_sub */ @@ -146,13 +174,14 @@ _PUBLIC_ void all_string_sub(char *s,const char *pattern,const char *insert, siz if (!*pattern) return; - + if (len == 0) len = ls + 1; /* len is number of *bytes* */ - - while (lp <= ls && (p = strstr(s,pattern))) { + + while (lp <= ls && (p = strstr_m(s,pattern))) { if (ls + (li-lp) >= len) { - DEBUG(0,("ERROR: string overflow by %d in all_string_sub(%.50s, %d)\n", + DEBUG(0,("ERROR: string overflow by " + "%d in all_string_sub(%.50s, %d)\n", (int)(ls + (li-lp) - len), pattern, (int)len)); break; diff --git a/lib/util/system.c b/lib/util/system.c index 9bf5de1a83..1e80f1a88a 100644 --- a/lib/util/system.c +++ b/lib/util/system.c @@ -22,6 +22,8 @@ #include "system/network.h" #include "system/filesys.h" +#undef malloc + /* The idea is that this file will eventually have wrappers around all important system calls in samba. The aims are: @@ -37,6 +39,42 @@ expansions/etc make sense to the OS should be acceptable to Samba. */ +/******************************************************************* + A wrapper for memalign +********************************************************************/ + +void *sys_memalign( size_t align, size_t size ) +{ +#if defined(HAVE_POSIX_MEMALIGN) + void *p = NULL; + int ret = posix_memalign( &p, align, size ); + if ( ret == 0 ) + return p; + + return NULL; +#elif defined(HAVE_MEMALIGN) + return memalign( align, size ); +#else + /* On *BSD systems memaligns doesn't exist, but memory will + * be aligned on allocations of > pagesize. */ +#if defined(SYSCONF_SC_PAGESIZE) + size_t pagesize = (size_t)sysconf(_SC_PAGESIZE); +#elif defined(HAVE_GETPAGESIZE) + size_t pagesize = (size_t)getpagesize(); +#else + size_t pagesize = (size_t)-1; +#endif + if (pagesize == (size_t)-1) { + DEBUG(0,("memalign functionalaity not available on this platform!\n")); + return NULL; + } + if (size < pagesize) { + size = pagesize; + } + return malloc(size); +#endif +} + /************************************************************************** A wrapper for gethostbyname() that tries avoids looking up hostnames in the root domain, which can cause dial-on-demand links to come up for no @@ -117,3 +155,76 @@ _PUBLIC_ pid_t sys_getpid(void) return mypid; } + + +_PUBLIC_ int sys_getpeereid( int s, uid_t *uid) +{ +#if defined(HAVE_PEERCRED) + struct ucred cred; + socklen_t cred_len = sizeof(struct ucred); + int ret; + + ret = getsockopt(s, SOL_SOCKET, SO_PEERCRED, (void *)&cred, &cred_len); + if (ret != 0) { + return -1; + } + + if (cred_len != sizeof(struct ucred)) { + errno = EINVAL; + return -1; + } + + *uid = cred.uid; + return 0; +#else +#if defined(HAVE_GETPEEREID) + gid_t gid; + return getpeereid(s, uid, &gid); +#endif + errno = ENOSYS; + return -1; +#endif +} + +_PUBLIC_ int sys_getnameinfo(const struct sockaddr *psa, + int salen, + char *host, + size_t hostlen, + char *service, + size_t servlen, + int flags) +{ + /* + * For Solaris we must make sure salen is the + * correct length for the incoming sa_family. + */ + + if (salen == sizeof(struct sockaddr_storage)) { + salen = sizeof(struct sockaddr_in); +#if defined(HAVE_IPV6) + if (psa->sa_family == AF_INET6) { + salen = sizeof(struct sockaddr_in6); + } +#endif + } + return getnameinfo(psa, salen, host, hostlen, service, servlen, flags); +} + +_PUBLIC_ int sys_connect(int fd, const struct sockaddr * addr) +{ + socklen_t salen = (socklen_t)-1; + + if (addr->sa_family == AF_INET) { + salen = sizeof(struct sockaddr_in); + } else if (addr->sa_family == AF_UNIX) { + salen = sizeof(struct sockaddr_un); + } +#if defined(HAVE_IPV6) + else if (addr->sa_family == AF_INET6) { + salen = sizeof(struct sockaddr_in6); + } +#endif + + return connect(fd, addr, salen); +} + diff --git a/lib/util/talloc_stack.c b/lib/util/talloc_stack.c index 8e559cc20f..16e9d745d3 100644 --- a/lib/util/talloc_stack.c +++ b/lib/util/talloc_stack.c @@ -188,3 +188,20 @@ TALLOC_CTX *talloc_tos(void) return ts->talloc_stack[ts->talloc_stacksize-1]; } + +/* + * return true if a talloc stackframe exists + * this can be used to prevent memory leaks for code that can + * optionally use a talloc stackframe (eg. nt_errstr()) + */ + +bool talloc_stackframe_exists(void) +{ + struct talloc_stackframe *ts = + (struct talloc_stackframe *)SMB_THREAD_GET_TLS(global_ts); + + if (ts == NULL || ts->talloc_stacksize == 0) { + return false; + } + return true; +} diff --git a/lib/util/talloc_stack.h b/lib/util/talloc_stack.h index 0e8fab3759..ec0c1c6f37 100644 --- a/lib/util/talloc_stack.h +++ b/lib/util/talloc_stack.h @@ -53,4 +53,12 @@ TALLOC_CTX *talloc_stackframe_pool(size_t poolsize); TALLOC_CTX *talloc_tos(void); +/* + * return true if a talloc stackframe exists + * this can be used to prevent memory leaks for code that can + * optionally use a talloc stackframe (eg. nt_errstr()) + */ + +bool talloc_stackframe_exists(void); + #endif diff --git a/lib/util/tdb_wrap.c b/lib/util/tdb_wrap.c new file mode 100644 index 0000000000..71aea5e36c --- /dev/null +++ b/lib/util/tdb_wrap.c @@ -0,0 +1,215 @@ +/* + Unix SMB/CIFS implementation. + TDB wrap functions + + Copyright (C) Andrew Tridgell 2004 + Copyright (C) Jelmer Vernooij <jelmer@samba.org> 2007 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include "includes.h" +#include "lib/util/dlinklist.h" +#include "lib/util/tdb_wrap.h" + +/* FIXME: TDB2 does this internally, so no need to wrap multiple opens! */ +#if BUILD_TDB2 +static void tdb_wrap_log(struct tdb_context *tdb, + enum tdb_log_level level, + const char *message, + void *unused) +{ + int dl; + const char *name = tdb_name(tdb); + + switch (level) { + case TDB_LOG_USE_ERROR: + case TDB_LOG_ERROR: + dl = 0; + break; + case TDB_LOG_WARNING: + dl = 2; + break; + default: + dl = 0; + } + + DEBUG(dl, ("tdb(%s): %s", name ? name : "unnamed", message)); +} +#else +/* + Log tdb messages via DEBUG(). +*/ +static void tdb_wrap_log(TDB_CONTEXT *tdb, enum tdb_debug_level level, + const char *format, ...) PRINTF_ATTRIBUTE(3,4); + +static void tdb_wrap_log(TDB_CONTEXT *tdb, enum tdb_debug_level level, + const char *format, ...) +{ + va_list ap; + char *ptr = NULL; + int debuglevel = 0; + int ret; + + switch (level) { + case TDB_DEBUG_FATAL: + debuglevel = 0; + break; + case TDB_DEBUG_ERROR: + debuglevel = 1; + break; + case TDB_DEBUG_WARNING: + debuglevel = 2; + break; + case TDB_DEBUG_TRACE: + debuglevel = 5; + break; + default: + debuglevel = 0; + } + + va_start(ap, format); + ret = vasprintf(&ptr, format, ap); + va_end(ap); + + if (ret != -1) { + const char *name = tdb_name(tdb); + DEBUG(debuglevel, ("tdb(%s): %s", name ? name : "unnamed", ptr)); + free(ptr); + } +} +#endif + +struct tdb_wrap_private { + struct tdb_context *tdb; + const char *name; + struct tdb_wrap_private *next, *prev; +}; + +static struct tdb_wrap_private *tdb_list; + +/* destroy the last connection to a tdb */ +static int tdb_wrap_private_destructor(struct tdb_wrap_private *w) +{ + tdb_close(w->tdb); + DLIST_REMOVE(tdb_list, w); + return 0; +} + +static struct tdb_wrap_private *tdb_wrap_private_open(TALLOC_CTX *mem_ctx, + const char *name, + int hash_size, + int tdb_flags, + int open_flags, + mode_t mode) +{ + struct tdb_wrap_private *result; + + result = talloc(mem_ctx, struct tdb_wrap_private); + if (result == NULL) { + return NULL; + } + result->name = talloc_strdup(result, name); + if (result->name == NULL) { + goto fail; + } + +#if _SAMBA_BUILD_ == 3 + /* This #if _SAMBA_BUILD == 3 is very unfortunate, as it means + * that in the top level build, these options are not + * available for these databases. However, having two + * different tdb_wrap lists is a worse fate, so this will do + * for now */ + + if (!lp_use_mmap()) { + tdb_flags |= TDB_NOMMAP; + } + + if ((hash_size == 0) && (name != NULL)) { + const char *base; + base = strrchr_m(name, '/'); + + if (base != NULL) { + base += 1; + } else { + base = name; + } + hash_size = lp_parm_int(-1, "tdb_hashsize", base, 0); + } +#endif + + result->tdb = tdb_open_compat(name, hash_size, tdb_flags, + open_flags, mode, tdb_wrap_log, NULL); + if (result->tdb == NULL) { + goto fail; + } + talloc_set_destructor(result, tdb_wrap_private_destructor); + DLIST_ADD(tdb_list, result); + return result; + +fail: + TALLOC_FREE(result); + return NULL; +} + +/* + wrapped connection to a tdb database + to close just talloc_free() the tdb_wrap pointer + */ +struct tdb_wrap *tdb_wrap_open(TALLOC_CTX *mem_ctx, + const char *name, int hash_size, int tdb_flags, + int open_flags, mode_t mode) +{ + struct tdb_wrap *result; + struct tdb_wrap_private *w; + + result = talloc(mem_ctx, struct tdb_wrap); + if (result == NULL) { + return NULL; + } + + for (w=tdb_list;w;w=w->next) { + if (strcmp(name, w->name) == 0) { + break; + } + } + + if (w == NULL) { + w = tdb_wrap_private_open(result, name, hash_size, tdb_flags, + open_flags, mode); + } else { + /* + * Correctly use talloc_reference: The tdb will be + * closed when "w" is being freed. The caller never + * sees "w", so an incorrect use of talloc_free(w) + * instead of calling talloc_unlink is not possible. + * To avoid having to refcount ourselves, "w" will + * have multiple parents that hang off all the + * tdb_wrap's being returned from here. Those parents + * can be freed without problem. + */ + if (talloc_reference(result, w) == NULL) { + goto fail; + } + } + if (w == NULL) { + goto fail; + } + result->tdb = w->tdb; + return result; +fail: + TALLOC_FREE(result); + return NULL; +} + diff --git a/lib/util/tdb_wrap.h b/lib/util/tdb_wrap.h new file mode 100644 index 0000000000..6f9f3834d4 --- /dev/null +++ b/lib/util/tdb_wrap.h @@ -0,0 +1,42 @@ +/* + Unix SMB/CIFS implementation. + + database wrap headers + + Copyright (C) Andrew Tridgell 2004 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +/* IMPORTANT: tdb_wrap should be always preferred over tdb_context for end consumer functions + it's because if the code will be running inside smbd, then we must use the linked list + of open tdb files, to determine if the tdb we desire is already open + as otherwise, when you close the tdb (even on a different file descriptor), + ALL LOCKS are lost (due to a real screwup in the POSIX specification that nobody has been able to get fixed) +*/ + +#ifndef _TDB_WRAP_H_ +#define _TDB_WRAP_H_ + +#include "tdb_compat.h" + +struct tdb_wrap { + struct tdb_context *tdb; +}; + +struct tdb_wrap *tdb_wrap_open(TALLOC_CTX *mem_ctx, + const char *name, int hash_size, int tdb_flags, + int open_flags, mode_t mode); + +#endif /* _TDB_WRAP_H_ */ diff --git a/lib/util/tests/asn1_tests.c b/lib/util/tests/asn1_tests.c index ac8ca538f8..3ee64c3f7a 100644 --- a/lib/util/tests/asn1_tests.c +++ b/lib/util/tests/asn1_tests.c @@ -4,6 +4,8 @@ util_asn1 testing Copyright (C) Kamen Mazdrashki <kamen.mazdrashki@postpath.com> 2009 + Copyright (C) Volker Lendecke 2004 + Copyright (C) Andrew Bartlett 2011 This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -103,6 +105,55 @@ static const struct oid_data partial_oid_data_ok[] = { }, }; +static const struct { + DATA_BLOB blob; + int value; +} integer_tests[] = { + { + .blob = {"\x02\x01\x00", 3}, + .value = 0 + }, + { + .blob = {"\x02\x01\x7f", 3}, + .value = 127 + }, + { + .blob = {"\x02\x02\x00\x80", 4}, + .value = 128 + }, + { + .blob = {"\x02\x02\x01\x00", 4}, + .value = 256 + }, + { + .blob = {"\x02\x01\x80", 3}, + .value = -128 + }, + { + .blob = {"\x02\x02\xff\x7f", 4}, + .value = -129 + }, + { + .blob = {"\x02\x01\xff", 3}, + .value = -1 + }, + { + .blob = {"\x02\x02\xff\x01", 4}, + .value = -255 + }, + { + .blob = {"\x02\x02\x00\xff", 4}, + .value = 255 + }, + { + .blob = {"\x02\x04\x80\x00\x00\x00", 6}, + .value = 0x80000000 + }, + { + .blob = {"\x02\x04\x7f\xff\xff\xff", 6}, + .value = 0x7fffffff + } +}; /* Testing ber_write_OID_String() function */ static bool test_ber_write_OID_String(struct torture_context *tctx) @@ -260,6 +311,46 @@ static bool test_ber_read_partial_OID_String(struct torture_context *tctx) return true; } +/* + * Testing asn1_read_Integer and asn1_write_Integer functions, + * inspired by Love Hornquist Astrand + */ + +static bool test_asn1_Integer(struct torture_context *tctx) +{ + int i; + TALLOC_CTX *mem_ctx; + + mem_ctx = talloc_new(tctx); + + for (i = 0; i < ARRAY_SIZE(integer_tests); i++) { + ASN1_DATA *data; + DATA_BLOB blob; + int val; + + data = asn1_init(mem_ctx); + if (!data) { + return -1; + } + + asn1_write_Integer(data, integer_tests[i].value); + + blob.data = data->data; + blob.length = data->length; + torture_assert_data_blob_equal(tctx, blob, integer_tests[i].blob, "asn1_write_Integer gave incorrect result"); + + asn1_load(data, blob); + torture_assert(tctx, asn1_read_Integer(data, &val), "asn1_write_Integer output could not be read by asn1_read_Integer()"); + + torture_assert_int_equal(tctx, val, integer_tests[i].value, + "readback of asn1_write_Integer output by asn1_read_Integer() failed"); + } + + talloc_free(mem_ctx); + + return true; +} + /* LOCAL-ASN1 test suite creation */ struct torture_suite *torture_local_util_asn1(TALLOC_CTX *mem_ctx) @@ -278,5 +369,8 @@ struct torture_suite *torture_local_util_asn1(TALLOC_CTX *mem_ctx) torture_suite_add_simple_test(suite, "ber_read_partial_OID_String", test_ber_read_partial_OID_String); + torture_suite_add_simple_test(suite, "asn1_Integer", + test_asn1_Integer); + return suite; } diff --git a/lib/util/tests/str.c b/lib/util/tests/str.c index 6b38feaf43..f9f3abf731 100644 --- a/lib/util/tests/str.c +++ b/lib/util/tests/str.c @@ -25,7 +25,7 @@ static bool test_string_sub_simple(struct torture_context *tctx) { char tmp[100]; - safe_strcpy(tmp, "foobar", sizeof(tmp)); + strlcpy(tmp, "foobar", sizeof(tmp)); string_sub(tmp, "foo", "bar", sizeof(tmp)); torture_assert_str_equal(tctx, tmp, "barbar", "invalid sub"); return true; @@ -34,7 +34,7 @@ static bool test_string_sub_simple(struct torture_context *tctx) static bool test_string_sub_multiple(struct torture_context *tctx) { char tmp[100]; - safe_strcpy(tmp, "fooblafoo", sizeof(tmp)); + strlcpy(tmp, "fooblafoo", sizeof(tmp)); string_sub(tmp, "foo", "bar", sizeof(tmp)); torture_assert_str_equal(tctx, tmp, "barblabar", "invalid sub"); return true; @@ -43,7 +43,7 @@ static bool test_string_sub_multiple(struct torture_context *tctx) static bool test_string_sub_longer(struct torture_context *tctx) { char tmp[100]; - safe_strcpy(tmp, "foobla", sizeof(tmp)); + strlcpy(tmp, "foobla", sizeof(tmp)); string_sub(tmp, "foo", "blie", sizeof(tmp)); torture_assert_str_equal(tctx, tmp, "bliebla", "invalid sub"); return true; @@ -52,7 +52,7 @@ static bool test_string_sub_longer(struct torture_context *tctx) static bool test_string_sub_shorter(struct torture_context *tctx) { char tmp[100]; - safe_strcpy(tmp, "foobla", sizeof(tmp)); + strlcpy(tmp, "foobla", sizeof(tmp)); string_sub(tmp, "foo", "bl", sizeof(tmp)); torture_assert_str_equal(tctx, tmp, "blbla", "invalid sub"); return true; @@ -61,7 +61,7 @@ static bool test_string_sub_shorter(struct torture_context *tctx) static bool test_string_sub_special_char(struct torture_context *tctx) { char tmp[100]; - safe_strcpy(tmp, "foobla", sizeof(tmp)); + strlcpy(tmp, "foobla", sizeof(tmp)); string_sub(tmp, "foo", "%b;l", sizeof(tmp)); torture_assert_str_equal(tctx, tmp, "_b_lbla", "invalid sub"); return true; diff --git a/lib/util/tests/time.c b/lib/util/tests/time.c index 592f88f88b..a8b26762e3 100644 --- a/lib/util/tests/time.c +++ b/lib/util/tests/time.c @@ -81,29 +81,11 @@ static bool test_timestring(struct torture_context *tctx) return true; } -static bool test_get_time_zone(struct torture_context *tctx) -{ - time_t t = time(NULL); - int old_extra_time_offset = extra_time_offset; - int old_offset, new_offset; - /* test that extra_time_offset works */ - - old_offset = get_time_zone(t); - extra_time_offset = 42; - new_offset = get_time_zone(t); - extra_time_offset = old_extra_time_offset; - torture_assert_int_equal(tctx, old_offset+60*42, new_offset, - "time offset not used"); - return true; -} - - struct torture_suite *torture_local_util_time(TALLOC_CTX *mem_ctx) { struct torture_suite *suite = torture_suite_create(mem_ctx, "time"); torture_suite_add_simple_test(suite, "null_time", test_null_time); - torture_suite_add_simple_test(suite, "get_time_zone", test_get_time_zone); torture_suite_add_simple_test(suite, "null_nttime", test_null_nttime); torture_suite_add_simple_test(suite, "http_timestring", test_http_timestring); diff --git a/lib/util/time.c b/lib/util/time.c index 4843fc9697..31aa05cd0f 100644 --- a/lib/util/time.c +++ b/lib/util/time.c @@ -580,6 +580,24 @@ _PUBLIC_ struct timeval timeval_current_ofs(uint32_t secs, uint32_t usecs) } /** + return a timeval milliseconds into the future +*/ +_PUBLIC_ struct timeval timeval_current_ofs_msec(uint32_t msecs) +{ + struct timeval tv = timeval_current(); + return timeval_add(&tv, msecs / 1000, (msecs % 1000) * 1000); +} + +/** + return a timeval microseconds into the future +*/ +_PUBLIC_ struct timeval timeval_current_ofs_usec(uint32_t usecs) +{ + struct timeval tv = timeval_current(); + return timeval_add(&tv, usecs / 1000000, usecs % 1000000); +} + +/** compare two timeval structures. Return -1 if tv1 < tv2 Return 0 if tv1 == tv2 @@ -720,8 +738,6 @@ static int tm_diff(struct tm *a, struct tm *b) } -int extra_time_offset=0; - /** return the UTC offset in seconds west of UTC, or 0 if it cannot be determined */ @@ -735,7 +751,7 @@ _PUBLIC_ int get_time_zone(time_t t) tm = localtime(&t); if (!tm) return 0; - return tm_diff(&tm_utc,tm)+60*extra_time_offset; + return tm_diff(&tm_utc,tm); } struct timespec nt_time_to_unix_timespec(NTTIME *nt) diff --git a/lib/util/time.h b/lib/util/time.h index 3a406340f4..204c261c1d 100644 --- a/lib/util/time.h +++ b/lib/util/time.h @@ -1,7 +1,12 @@ /* Unix SMB/CIFS implementation. time utility functions - + + Copyright (C) Andrew Tridgell 1992-2004 + Copyright (C) Stefan (metze) Metzmacher 2002 + Copyright (C) Jeremy Allison 2007 + Copyright (C) Andrew Bartlett 2011 + This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or @@ -213,6 +218,16 @@ struct timeval timeval_sum(const struct timeval *tv1, _PUBLIC_ struct timeval timeval_current_ofs(uint32_t secs, uint32_t usecs); /** + return a timeval milliseconds into the future +*/ +_PUBLIC_ struct timeval timeval_current_ofs_msec(uint32_t msecs); + +/** + return a timeval microseconds into the future +*/ +_PUBLIC_ struct timeval timeval_current_ofs_usec(uint32_t usecs); + +/** compare two timeval structures. Return -1 if tv1 < tv2 Return 0 if tv1 == tv2 @@ -285,7 +300,4 @@ struct timespec convert_time_t_to_timespec(time_t t); bool null_timespec(struct timespec ts); -/** Extra minutes to add to the normal GMT to local time conversion. */ -extern int extra_time_offset; - #endif /* _SAMBA_TIME_H_ */ diff --git a/lib/util/util.c b/lib/util/util.c index d4a936fae9..7f30d436e8 100644 --- a/lib/util/util.c +++ b/lib/util/util.c @@ -152,7 +152,8 @@ _PUBLIC_ bool directory_create_or_exist(const char *dname, uid_t uid, } if ((st.st_mode & 0777) != dir_perms) { DEBUG(0, ("invalid permissions on directory " - "%s\n", dname)); + "'%s': has 0%o should be 0%o\n", dname, + (st.st_mode & 0777), dir_perms)); umask(old_umask); return false; } diff --git a/lib/util/util.h b/lib/util/util.h index 45779912f3..c715440186 100644 --- a/lib/util/util.h +++ b/lib/util/util.h @@ -62,6 +62,8 @@ extern const char *panic_action; #include "lib/util/memory.h" +#include "lib/util/string_wrappers.h" + /** * Write backtrace to debug log */ @@ -113,6 +115,8 @@ void CatchChildLeaveStatus(void); /* The following definitions come from lib/util/system.c */ +void *sys_memalign( size_t align, size_t size ); + /************************************************************************** A wrapper for gethostbyname() that tries avoids looking up hostnames in the root domain, which can cause dial-on-demand links to come up for no @@ -131,8 +135,20 @@ _PUBLIC_ pid_t sys_fork(void); **/ _PUBLIC_ pid_t sys_getpid(void); -/* The following definitions come from lib/util/genrand.c */ +_PUBLIC_ int sys_getpeereid( int s, uid_t *uid); + +struct sockaddr; + +_PUBLIC_ int sys_getnameinfo(const struct sockaddr *psa, + int salen, + char *host, + size_t hostlen, + char *service, + size_t servlen, + int flags); +_PUBLIC_ int sys_connect(int fd, const struct sockaddr * addr); +/* The following definitions come from lib/util/genrand.c */ /** Copy any user given reseed data. **/ @@ -195,14 +211,10 @@ _PUBLIC_ char** generate_unique_strs(TALLOC_CTX *mem_ctx, size_t len, uint32_t num); /* The following definitions come from lib/util/dprintf.c */ -#if _SAMBA_BUILD_ == 4 -_PUBLIC_ void d_set_iconv(smb_iconv_t); -_PUBLIC_ int d_vfprintf(FILE *f, const char *format, va_list ap) PRINTF_ATTRIBUTE(2,0); _PUBLIC_ int d_fprintf(FILE *f, const char *format, ...) PRINTF_ATTRIBUTE(2,3); _PUBLIC_ int d_printf(const char *format, ...) PRINTF_ATTRIBUTE(1,2); _PUBLIC_ void display_set_stderr(void); -#endif /* The following definitions come from lib/util/util_str.c */ @@ -233,18 +245,6 @@ _PUBLIC_ bool trim_string(char *s, const char *front, const char *back); _PUBLIC_ _PURE_ size_t count_chars(const char *s, char c); /** - Safe string copy into a known length string. maxlength does not - include the terminating zero. -**/ -_PUBLIC_ char *safe_strcpy(char *dest,const char *src, size_t maxlength); - -/** - Safe string cat into a string. maxlength does not - include the terminating zero. -**/ -_PUBLIC_ char *safe_strcat(char *dest, const char *src, size_t maxlength); - -/** Routine to get hex characters and turn them into a 16 byte array. the array can be variable length, and any non-hex-numeric characters are skipped. "0xnn" or "0Xnn" is specially catered @@ -284,6 +284,8 @@ _PUBLIC_ char *hex_encode_talloc(TALLOC_CTX *mem_ctx, const unsigned char *buff_ **/ _PUBLIC_ void string_sub(char *s,const char *pattern, const char *insert, size_t len); +_PUBLIC_ void string_sub_once(char *s, const char *pattern, + const char *insert, size_t len); _PUBLIC_ char *string_sub_talloc(TALLOC_CTX *mem_ctx, const char *s, const char *pattern, const char *insert); @@ -369,12 +371,10 @@ _PUBLIC_ bool set_boolean(const char *boolean_string, bool *boolean); */ _PUBLIC_ bool conv_str_bool(const char * str, bool * val); -#if _SAMBA_BUILD_ == 4 /** * Convert a size specification like 16K into an integral number of bytes. **/ -_PUBLIC_ bool conv_str_size(const char * str, uint64_t * val); -#endif +_PUBLIC_ bool conv_str_size_error(const char * str, uint64_t * val); /** * Parse a uint64_t value from a string @@ -775,11 +775,12 @@ enum protocol_types { PROTOCOL_SMB2 }; -int ms_fnmatch(const char *pattern, const char *string, enum protocol_types protocol); +#endif + +int ms_fnmatch_protocol(const char *pattern, const char *string, int protocol); /** a generic fnmatch function - uses for non-CIFS pattern matching */ int gen_fnmatch(const char *pattern, const char *string); -#endif /* The following definitions come from lib/util/idtree.c */ @@ -886,4 +887,32 @@ int samba_runcmd_recv(struct tevent_req *req, int *perrno); void samba_start_debugger(void); #endif +/** + * @brief Returns an absolute path to a file in the Samba modules directory. + * + * @param name File to find, relative to MODULESDIR. + * + * @retval Pointer to a string containing the full path. + **/ +char *modules_path(TALLOC_CTX *mem_ctx, const char *name); + +/** + * @brief Returns an absolute path to a file in the Samba data directory. + * + * @param name File to find, relative to CODEPAGEDIR. + * + * @retval Pointer to a talloc'ed string containing the full path. + **/ +char *data_path(TALLOC_CTX *mem_ctx, const char *name); + +/** + * @brief Returns the platform specific shared library extension. + * + * @retval Pointer to a const char * containing the extension. + **/ +const char *shlib_ext(void); + +struct server_id; +char *server_id_str(TALLOC_CTX *mem_ctx, const struct server_id *id); + #endif /* _SAMBA_UTIL_H_ */ diff --git a/lib/util/util_ldb.h b/lib/util/util_ldb.h index d2bc3b0ff7..66916443c3 100644 --- a/lib/util/util_ldb.h +++ b/lib/util/util_ldb.h @@ -1,3 +1,26 @@ +/* + Unix SMB/CIFS implementation. + + common share info functions + + Copyright (C) Andrew Tridgell 2004 + Copyright (C) Tim Potter 2004 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + + #ifndef __LIB_UTIL_UTIL_LDB_H__ #define __LIB_UTIL_UTIL_LDB_H__ diff --git a/lib/util/util_net.c b/lib/util/util_net.c index 9c8f5c6d47..64aa674d8b 100644 --- a/lib/util/util_net.c +++ b/lib/util/util_net.c @@ -54,6 +54,15 @@ bool interpret_string_addr_internal(struct addrinfo **ppres, /* By default make sure it supports TCP. */ hints.ai_socktype = SOCK_STREAM; + + /* always try as a numeric host first. This prevents unnecessary name + * lookups, and also ensures we accept IPv6 addresses */ + hints.ai_flags = AI_PASSIVE | AI_NUMERICHOST; + ret = getaddrinfo(str, NULL, &hints, ppres); + if (ret == 0) { + return true; + } + hints.ai_flags = flags; /* Linux man page on getaddrinfo() says port will be @@ -297,10 +306,10 @@ bool is_ipaddress_v4(const char *str) } /** - * Return true if a string could be an IPv4 or IPv6 address. + * Return true if a string could be a IPv6 address. */ -bool is_ipaddress(const char *str) +bool is_ipaddress_v6(const char *str) { #if defined(HAVE_IPV6) int ret = -1; @@ -328,7 +337,16 @@ bool is_ipaddress(const char *str) } } #endif - return is_ipaddress_v4(str); + return false; +} + +/** + * Return true if a string could be an IPv4 or IPv6 address. + */ + +bool is_ipaddress(const char *str) +{ + return is_ipaddress_v4(str) || is_ipaddress_v6(str); } /** @@ -405,7 +423,7 @@ bool is_zero_addr(const struct sockaddr_storage *pss) */ void zero_ip_v4(struct in_addr *ip) { - memset(ip, '\0', sizeof(struct in_addr)); + ZERO_STRUCTP(ip); } /** @@ -415,7 +433,7 @@ void in_addr_to_sockaddr_storage(struct sockaddr_storage *ss, struct in_addr ip) { struct sockaddr_in *sa = (struct sockaddr_in *)ss; - memset(ss, '\0', sizeof(*ss)); + ZERO_STRUCTP(ss); sa->sin_family = AF_INET; sa->sin_addr = ip; } @@ -540,3 +558,319 @@ void set_sockaddr_port(struct sockaddr *psa, uint16_t port) } +/**************************************************************************** + Get a port number in host byte order from a sockaddr_storage. +****************************************************************************/ + +uint16_t get_sockaddr_port(const struct sockaddr_storage *pss) +{ + uint16_t port = 0; + + if (pss->ss_family != AF_INET) { +#if defined(HAVE_IPV6) + /* IPv6 */ + const struct sockaddr_in6 *sa6 = + (const struct sockaddr_in6 *)pss; + port = ntohs(sa6->sin6_port); +#endif + } else { + const struct sockaddr_in *sa = + (const struct sockaddr_in *)pss; + port = ntohs(sa->sin_port); + } + return port; +} + +/**************************************************************************** + Print out an IPv4 or IPv6 address from a struct sockaddr_storage. +****************************************************************************/ + +char *print_sockaddr_len(char *dest, + size_t destlen, + const struct sockaddr *psa, + socklen_t psalen) +{ + if (destlen > 0) { + dest[0] = '\0'; + } + (void)sys_getnameinfo(psa, + psalen, + dest, destlen, + NULL, 0, + NI_NUMERICHOST); + return dest; +} + +/**************************************************************************** + Print out an IPv4 or IPv6 address from a struct sockaddr_storage. +****************************************************************************/ + +char *print_sockaddr(char *dest, + size_t destlen, + const struct sockaddr_storage *psa) +{ + return print_sockaddr_len(dest, destlen, (const struct sockaddr *)psa, + sizeof(struct sockaddr_storage)); +} + +/**************************************************************************** + Print out a canonical IPv4 or IPv6 address from a struct sockaddr_storage. +****************************************************************************/ + +char *print_canonical_sockaddr(TALLOC_CTX *ctx, + const struct sockaddr_storage *pss) +{ + char addr[INET6_ADDRSTRLEN]; + char *dest = NULL; + int ret; + + /* Linux getnameinfo() man pages says port is unitialized if + service name is NULL. */ + + ret = sys_getnameinfo((const struct sockaddr *)pss, + sizeof(struct sockaddr_storage), + addr, sizeof(addr), + NULL, 0, + NI_NUMERICHOST); + if (ret != 0) { + return NULL; + } + + if (pss->ss_family != AF_INET) { +#if defined(HAVE_IPV6) + dest = talloc_asprintf(ctx, "[%s]", addr); +#else + return NULL; +#endif + } else { + dest = talloc_asprintf(ctx, "%s", addr); + } + + return dest; +} + +/**************************************************************************** + Return the port number we've bound to on a socket. +****************************************************************************/ + +int get_socket_port(int fd) +{ + struct sockaddr_storage sa; + socklen_t length = sizeof(sa); + + if (fd == -1) { + return -1; + } + + if (getsockname(fd, (struct sockaddr *)&sa, &length) < 0) { + int level = (errno == ENOTCONN) ? 2 : 0; + DEBUG(level, ("getsockname failed. Error was %s\n", + strerror(errno))); + return -1; + } + +#if defined(HAVE_IPV6) + if (sa.ss_family == AF_INET6) { + return ntohs(((struct sockaddr_in6 *)&sa)->sin6_port); + } +#endif + if (sa.ss_family == AF_INET) { + return ntohs(((struct sockaddr_in *)&sa)->sin_port); + } + return -1; +} + +/**************************************************************************** + Return the string of an IP address (IPv4 or IPv6). +****************************************************************************/ + +static const char *get_socket_addr(int fd, char *addr_buf, size_t addr_len) +{ + struct sockaddr_storage sa; + socklen_t length = sizeof(sa); + + /* Ok, returning a hard coded IPv4 address + * is bogus, but it's just as bogus as a + * zero IPv6 address. No good choice here. + */ + + strlcpy(addr_buf, "0.0.0.0", addr_len); + + if (fd == -1) { + return addr_buf; + } + + if (getsockname(fd, (struct sockaddr *)&sa, &length) < 0) { + DEBUG(0,("getsockname failed. Error was %s\n", + strerror(errno) )); + return addr_buf; + } + + return print_sockaddr_len(addr_buf, addr_len, (struct sockaddr *)&sa, length); +} + +const char *client_socket_addr(int fd, char *addr, size_t addr_len) +{ + return get_socket_addr(fd, addr, addr_len); +} + + +enum SOCK_OPT_TYPES {OPT_BOOL,OPT_INT,OPT_ON}; + +typedef struct smb_socket_option { + const char *name; + int level; + int option; + int value; + int opttype; +} smb_socket_option; + +static const smb_socket_option socket_options[] = { + {"SO_KEEPALIVE", SOL_SOCKET, SO_KEEPALIVE, 0, OPT_BOOL}, + {"SO_REUSEADDR", SOL_SOCKET, SO_REUSEADDR, 0, OPT_BOOL}, + {"SO_BROADCAST", SOL_SOCKET, SO_BROADCAST, 0, OPT_BOOL}, +#ifdef TCP_NODELAY + {"TCP_NODELAY", IPPROTO_TCP, TCP_NODELAY, 0, OPT_BOOL}, +#endif +#ifdef TCP_KEEPCNT + {"TCP_KEEPCNT", IPPROTO_TCP, TCP_KEEPCNT, 0, OPT_INT}, +#endif +#ifdef TCP_KEEPIDLE + {"TCP_KEEPIDLE", IPPROTO_TCP, TCP_KEEPIDLE, 0, OPT_INT}, +#endif +#ifdef TCP_KEEPINTVL + {"TCP_KEEPINTVL", IPPROTO_TCP, TCP_KEEPINTVL, 0, OPT_INT}, +#endif +#ifdef IPTOS_LOWDELAY + {"IPTOS_LOWDELAY", IPPROTO_IP, IP_TOS, IPTOS_LOWDELAY, OPT_ON}, +#endif +#ifdef IPTOS_THROUGHPUT + {"IPTOS_THROUGHPUT", IPPROTO_IP, IP_TOS, IPTOS_THROUGHPUT, OPT_ON}, +#endif +#ifdef SO_REUSEPORT + {"SO_REUSEPORT", SOL_SOCKET, SO_REUSEPORT, 0, OPT_BOOL}, +#endif +#ifdef SO_SNDBUF + {"SO_SNDBUF", SOL_SOCKET, SO_SNDBUF, 0, OPT_INT}, +#endif +#ifdef SO_RCVBUF + {"SO_RCVBUF", SOL_SOCKET, SO_RCVBUF, 0, OPT_INT}, +#endif +#ifdef SO_SNDLOWAT + {"SO_SNDLOWAT", SOL_SOCKET, SO_SNDLOWAT, 0, OPT_INT}, +#endif +#ifdef SO_RCVLOWAT + {"SO_RCVLOWAT", SOL_SOCKET, SO_RCVLOWAT, 0, OPT_INT}, +#endif +#ifdef SO_SNDTIMEO + {"SO_SNDTIMEO", SOL_SOCKET, SO_SNDTIMEO, 0, OPT_INT}, +#endif +#ifdef SO_RCVTIMEO + {"SO_RCVTIMEO", SOL_SOCKET, SO_RCVTIMEO, 0, OPT_INT}, +#endif +#ifdef TCP_FASTACK + {"TCP_FASTACK", IPPROTO_TCP, TCP_FASTACK, 0, OPT_INT}, +#endif +#ifdef TCP_QUICKACK + {"TCP_QUICKACK", IPPROTO_TCP, TCP_QUICKACK, 0, OPT_BOOL}, +#endif +#ifdef TCP_KEEPALIVE_THRESHOLD + {"TCP_KEEPALIVE_THRESHOLD", IPPROTO_TCP, TCP_KEEPALIVE_THRESHOLD, 0, OPT_INT}, +#endif +#ifdef TCP_KEEPALIVE_ABORT_THRESHOLD + {"TCP_KEEPALIVE_ABORT_THRESHOLD", IPPROTO_TCP, TCP_KEEPALIVE_ABORT_THRESHOLD, 0, OPT_INT}, +#endif + {NULL,0,0,0,0}}; + +/**************************************************************************** + Print socket options. +****************************************************************************/ + +static void print_socket_options(int s) +{ + int value; + socklen_t vlen = 4; + const smb_socket_option *p = &socket_options[0]; + + /* wrapped in if statement to prevent streams + * leak in SCO Openserver 5.0 */ + /* reported on samba-technical --jerry */ + if ( DEBUGLEVEL >= 5 ) { + DEBUG(5,("Socket options:\n")); + for (; p->name != NULL; p++) { + if (getsockopt(s, p->level, p->option, + (void *)&value, &vlen) == -1) { + DEBUGADD(5,("\tCould not test socket option %s.\n", + p->name)); + } else { + DEBUGADD(5,("\t%s = %d\n", + p->name,value)); + } + } + } + } + +/**************************************************************************** + Set user socket options. +****************************************************************************/ + +void set_socket_options(int fd, const char *options) +{ + TALLOC_CTX *ctx = talloc_new(NULL); + char *tok; + + while (next_token_talloc(ctx, &options, &tok," \t,")) { + int ret=0,i; + int value = 1; + char *p; + bool got_value = false; + + if ((p = strchr_m(tok,'='))) { + *p = 0; + value = atoi(p+1); + got_value = true; + } + + for (i=0;socket_options[i].name;i++) + if (strequal(socket_options[i].name,tok)) + break; + + if (!socket_options[i].name) { + DEBUG(0,("Unknown socket option %s\n",tok)); + continue; + } + + switch (socket_options[i].opttype) { + case OPT_BOOL: + case OPT_INT: + ret = setsockopt(fd,socket_options[i].level, + socket_options[i].option, + (char *)&value,sizeof(int)); + break; + + case OPT_ON: + if (got_value) + DEBUG(0,("syntax error - %s " + "does not take a value\n",tok)); + + { + int on = socket_options[i].value; + ret = setsockopt(fd,socket_options[i].level, + socket_options[i].option, + (char *)&on,sizeof(int)); + } + break; + } + + if (ret != 0) { + /* be aware that some systems like Solaris return + * EINVAL to a setsockopt() call when the client + * sent a RST previously - no need to worry */ + DEBUG(2,("Failed to set socket option %s (Error %s)\n", + tok, strerror(errno) )); + } + } + + TALLOC_FREE(ctx); + print_socket_options(fd); +} diff --git a/lib/util/util_net.h b/lib/util/util_net.h index 530311e5c8..fc2776a32b 100644 --- a/lib/util/util_net.h +++ b/lib/util/util_net.h @@ -50,6 +50,15 @@ void set_sockaddr_port(struct sockaddr *psa, uint16_t port); **/ _PUBLIC_ bool is_zero_ip_v4(struct in_addr ip); +void in_addr_to_sockaddr_storage(struct sockaddr_storage *ss, + struct in_addr ip); +#if defined(HAVE_IPV6) +/** + * Convert an IPv6 struct in_addr to a struct sockaddr_storage. + */ +void in6_addr_to_sockaddr_storage(struct sockaddr_storage *ss, + struct in6_addr ip); +#endif /** Are two IPs on the same subnet? **/ @@ -60,6 +69,11 @@ _PUBLIC_ bool same_net_v4(struct in_addr ip1,struct in_addr ip2,struct in_addr m **/ _PUBLIC_ bool is_ipaddress(const char *str); +bool is_broadcast_addr(const struct sockaddr *pss); +bool is_loopback_ip_v4(struct in_addr ip); +bool is_loopback_addr(const struct sockaddr *pss); +bool is_zero_addr(const struct sockaddr_storage *pss); +void zero_ip_v4(struct in_addr *ip); /** Interpret an internet address or name into an IP address in 4 byte form. **/ @@ -71,6 +85,30 @@ _PUBLIC_ uint32_t interpret_addr(const char *str); _PUBLIC_ struct in_addr interpret_addr2(const char *str); _PUBLIC_ bool is_ipaddress_v4(const char *str); - +_PUBLIC_ bool is_ipaddress_v6(const char *str); + +bool is_address_any(const struct sockaddr *psa); +bool same_net(const struct sockaddr *ip1, + const struct sockaddr *ip2, + const struct sockaddr *mask); +bool sockaddr_equal(const struct sockaddr *ip1, + const struct sockaddr *ip2); + +bool is_address_any(const struct sockaddr *psa); +uint16_t get_sockaddr_port(const struct sockaddr_storage *pss); +char *print_sockaddr_len(char *dest, + size_t destlen, + const struct sockaddr *psa, + socklen_t psalen); +char *print_sockaddr(char *dest, + size_t destlen, + const struct sockaddr_storage *psa); +char *print_canonical_sockaddr(TALLOC_CTX *ctx, + const struct sockaddr_storage *pss); +const char *client_name(int fd); +int get_socket_port(int fd); +const char *client_socket_addr(int fd, char *addr, size_t addr_len); + +void set_socket_options(int fd, const char *options); #endif /* _SAMBA_UTIL_NET_H_ */ diff --git a/lib/util/util_paths.c b/lib/util/util_paths.c new file mode 100644 index 0000000000..0baa6801c5 --- /dev/null +++ b/lib/util/util_paths.c @@ -0,0 +1,63 @@ +/* + Unix SMB/CIFS implementation. + Samba utility functions + Copyright (C) Andrew Tridgell 1992-1998 + Copyright (C) Jeremy Allison 2001-2007 + Copyright (C) Simo Sorce 2001 + Copyright (C) Jim McDonough <jmcd@us.ibm.com> 2003 + Copyright (C) James Peach 2006 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include "includes.h" +#include "dynconfig/dynconfig.h" + +/** + * @brief Returns an absolute path to a file in the Samba modules directory. + * + * @param name File to find, relative to MODULESDIR. + * + * @retval Pointer to a string containing the full path. + **/ + +char *modules_path(TALLOC_CTX *mem_ctx, const char *name) +{ + return talloc_asprintf(mem_ctx, "%s/%s", get_dyn_MODULESDIR(), name); +} + +/** + * @brief Returns an absolute path to a file in the Samba data directory. + * + * @param name File to find, relative to CODEPAGEDIR. + * + * @retval Pointer to a talloc'ed string containing the full path. + **/ + +char *data_path(TALLOC_CTX *mem_ctx, const char *name) +{ + return talloc_asprintf(mem_ctx, "%s/%s", get_dyn_CODEPAGEDIR(), name); +} + +/** + * @brief Returns the platform specific shared library extension. + * + * @retval Pointer to a const char * containing the extension. + **/ + +const char *shlib_ext(void) +{ + return get_dyn_SHLIBEXT(); +} + diff --git a/lib/util/util_str.c b/lib/util/util_str.c index cf1b07ff0f..388d7887ef 100644 --- a/lib/util/util_str.c +++ b/lib/util/util_str.c @@ -32,87 +32,6 @@ **/ /** - Safe string copy into a known length string. maxlength does not - include the terminating zero. -**/ -_PUBLIC_ char *safe_strcpy(char *dest,const char *src, size_t maxlength) -{ - size_t len; - - if (!dest) { - DEBUG(0,("ERROR: NULL dest in safe_strcpy\n")); - return NULL; - } - -#ifdef DEVELOPER - /* We intentionally write out at the extremity of the destination - * string. If the destination is too short (e.g. pstrcpy into mallocd - * or fstring) then this should cause an error under a memory - * checker. */ - dest[maxlength] = '\0'; - if (PTR_DIFF(&len, dest) > 0) { /* check if destination is on the stack, ok if so */ - log_suspicious_usage("safe_strcpy", src); - } -#endif - - if (!src) { - *dest = 0; - return dest; - } - - len = strlen(src); - - if (len > maxlength) { - DEBUG(0,("ERROR: string overflow by %u (%u - %u) in safe_strcpy [%.50s]\n", - (unsigned int)(len-maxlength), (unsigned)len, (unsigned)maxlength, src)); - len = maxlength; - } - - memmove(dest, src, len); - dest[len] = 0; - return dest; -} - -/** - Safe string cat into a string. maxlength does not - include the terminating zero. -**/ -_PUBLIC_ char *safe_strcat(char *dest, const char *src, size_t maxlength) -{ - size_t src_len, dest_len; - - if (!dest) { - DEBUG(0,("ERROR: NULL dest in safe_strcat\n")); - return NULL; - } - - if (!src) - return dest; - -#ifdef DEVELOPER - if (PTR_DIFF(&src_len, dest) > 0) { /* check if destination is on the stack, ok if so */ - log_suspicious_usage("safe_strcat", src); - } -#endif - src_len = strlen(src); - dest_len = strlen(dest); - - if (src_len + dest_len > maxlength) { - DEBUG(0,("ERROR: string overflow by %d in safe_strcat [%.50s]\n", - (int)(src_len + dest_len - maxlength), src)); - if (maxlength > dest_len) { - memcpy(&dest[dest_len], src, maxlength - dest_len); - } - dest[maxlength] = 0; - return NULL; - } - - memcpy(&dest[dest_len], src, src_len); - dest[dest_len + src_len] = 0; - return dest; -} - -/** format a string into length-prefixed dotted domain format, as used in NBT and in some ADS structures **/ @@ -175,7 +94,7 @@ _PUBLIC_ bool conv_str_bool(const char * str, bool * val) /** * Convert a size specification like 16K into an integral number of bytes. **/ -_PUBLIC_ bool conv_str_size(const char * str, uint64_t * val) +_PUBLIC_ bool conv_str_size_error(const char * str, uint64_t * val) { char * end = NULL; unsigned long long lval; @@ -246,6 +165,6 @@ _PUBLIC_ bool strequal(const char *s1, const char *s2) if (!s1 || !s2) return false; - return strcasecmp(s1,s2) == 0; + return strcasecmp_m(s1,s2) == 0; } diff --git a/lib/util/util_tdb.c b/lib/util/util_tdb.c index 4a81678808..02c7095f66 100644 --- a/lib/util/util_tdb.c +++ b/lib/util/util_tdb.c @@ -20,7 +20,7 @@ */ #include "includes.h" -#include <tdb.h> +#include "../lib/tdb_compat/tdb_compat.h" #include "../lib/util/util_tdb.h" /* these are little tdb utility functions that are meant to make @@ -57,7 +57,7 @@ TDB_DATA string_term_tdb_data(const char *string) } /**************************************************************************** - Lock a chain by string. Return -1 if lock failed. + Lock a chain by string. Return non-zero if lock failed. ****************************************************************************/ int tdb_lock_bystring(struct tdb_context *tdb, const char *keyval) @@ -79,7 +79,7 @@ void tdb_unlock_bystring(struct tdb_context *tdb, const char *keyval) } /**************************************************************************** - Read lock a chain by string. Return -1 if lock failed. + Read lock a chain by string. Return non-zero if lock failed. ****************************************************************************/ int tdb_read_lock_bystring(struct tdb_context *tdb, const char *keyval) @@ -111,7 +111,7 @@ int32_t tdb_fetch_int32_byblob(struct tdb_context *tdb, TDB_DATA key) TDB_DATA data; int32_t ret; - data = tdb_fetch(tdb, key); + data = tdb_fetch_compat(tdb, key); if (!data.dptr || data.dsize != sizeof(int32_t)) { SAFE_FREE(data.dptr); return -1; @@ -133,7 +133,7 @@ int32_t tdb_fetch_int32(struct tdb_context *tdb, const char *keystr) } /**************************************************************************** - Store a int32_t value by an arbitrary blob key, return 0 on success, -1 on failure. + Store a int32_t value by an arbitrary blob key, return 0 on success, -ve on failure. Input is int32_t in native byte order. Output in tdb is in little-endian. ****************************************************************************/ @@ -150,7 +150,7 @@ int tdb_store_int32_byblob(struct tdb_context *tdb, TDB_DATA key, int32_t v) } /**************************************************************************** - Store a int32_t value by string key, return 0 on success, -1 on failure. + Store a int32_t value by string key, return 0 on success, -ve on failure. Input is int32_t in native byte order. Output in tdb is in little-endian. ****************************************************************************/ @@ -168,7 +168,7 @@ bool tdb_fetch_uint32_byblob(struct tdb_context *tdb, TDB_DATA key, uint32_t *va { TDB_DATA data; - data = tdb_fetch(tdb, key); + data = tdb_fetch_compat(tdb, key); if (!data.dptr || data.dsize != sizeof(uint32_t)) { SAFE_FREE(data.dptr); return false; @@ -190,7 +190,7 @@ bool tdb_fetch_uint32(struct tdb_context *tdb, const char *keystr, uint32_t *val } /**************************************************************************** - Store a uint32_t value by an arbitrary blob key, return 0 on success, -1 on failure. + Store a uint32_t value by an arbitrary blob key, return true on success, false on failure. Input is uint32_t in native byte order. Output in tdb is in little-endian. ****************************************************************************/ @@ -204,14 +204,14 @@ bool tdb_store_uint32_byblob(struct tdb_context *tdb, TDB_DATA key, uint32_t val data.dptr = (unsigned char *)&v_store; data.dsize = sizeof(uint32_t); - if (tdb_store(tdb, key, data, TDB_REPLACE) == -1) + if (tdb_store(tdb, key, data, TDB_REPLACE) != 0) ret = false; return ret; } /**************************************************************************** - Store a uint32_t value by string key, return 0 on success, -1 on failure. + Store a uint32_t value by string key, return true on success, false on failure. Input is uint32_t in native byte order. Output in tdb is in little-endian. ****************************************************************************/ @@ -220,7 +220,7 @@ bool tdb_store_uint32(struct tdb_context *tdb, const char *keystr, uint32_t valu return tdb_store_uint32_byblob(tdb, string_term_tdb_data(keystr), value); } /**************************************************************************** - Store a buffer by a null terminated string key. Return 0 on success, -1 + Store a buffer by a null terminated string key. Return 0 on success, -ve on failure. ****************************************************************************/ @@ -240,7 +240,7 @@ TDB_DATA tdb_fetch_bystring(struct tdb_context *tdb, const char *keystr) { TDB_DATA key = string_term_tdb_data(keystr); - return tdb_fetch(tdb, key); + return tdb_fetch_compat(tdb, key); } /**************************************************************************** @@ -263,7 +263,7 @@ int32_t tdb_change_int32_atomic(struct tdb_context *tdb, const char *keystr, int int32_t val; int32_t ret = -1; - if (tdb_lock_bystring(tdb, keystr) == -1) + if (tdb_lock_bystring(tdb, keystr) != 0) return -1; if ((val = tdb_fetch_int32(tdb, keystr)) == -1) { @@ -284,7 +284,7 @@ int32_t tdb_change_int32_atomic(struct tdb_context *tdb, const char *keystr, int /* Increment value for storage and return next time */ val += change_val; - if (tdb_store_int32(tdb, keystr, val) == -1) + if (tdb_store_int32(tdb, keystr, val) != 0) goto err_out; ret = 0; @@ -304,7 +304,7 @@ bool tdb_change_uint32_atomic(struct tdb_context *tdb, const char *keystr, uint3 uint32_t val; bool ret = false; - if (tdb_lock_bystring(tdb, keystr) == -1) + if (tdb_lock_bystring(tdb, keystr) != 0) return false; if (!tdb_fetch_uint32(tdb, keystr, &val)) { diff --git a/lib/util/util_tdb.h b/lib/util/util_tdb.h index d2f6648462..2d805d7d20 100644 --- a/lib/util/util_tdb.h +++ b/lib/util/util_tdb.h @@ -1,7 +1,27 @@ +/* + Unix SMB/CIFS implementation. + + tdb utility functions + + Copyright (C) Andrew Tridgell 1992-2006 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + #ifndef _____LIB_UTIL_UTIL_TDB_H__ #define _____LIB_UTIL_UTIL_TDB_H__ - /*************************************************************** Make a TDB_DATA and keep the const warning in one place ****************************************************************/ @@ -11,7 +31,7 @@ TDB_DATA string_tdb_data(const char *string); TDB_DATA string_term_tdb_data(const char *string); /**************************************************************************** - Lock a chain by string. Return -1 if lock failed. + Lock a chain by string. Return non-zero if lock failed. ****************************************************************************/ int tdb_lock_bystring(struct tdb_context *tdb, const char *keyval); @@ -21,7 +41,7 @@ int tdb_lock_bystring(struct tdb_context *tdb, const char *keyval); void tdb_unlock_bystring(struct tdb_context *tdb, const char *keyval); /**************************************************************************** - Read lock a chain by string. Return -1 if lock failed. + Read lock a chain by string. Return non-zero if lock failed. ****************************************************************************/ int tdb_read_lock_bystring(struct tdb_context *tdb, const char *keyval); @@ -43,13 +63,13 @@ int32_t tdb_fetch_int32_byblob(struct tdb_context *tdb, TDB_DATA key); int32_t tdb_fetch_int32(struct tdb_context *tdb, const char *keystr); /**************************************************************************** - Store a int32_t value by an arbitrary blob key, return 0 on success, -1 on failure. + Store a int32_t value by an arbitrary blob key, return 0 on success, -ve on failure. Input is int32_t in native byte order. Output in tdb is in little-endian. ****************************************************************************/ int tdb_store_int32_byblob(struct tdb_context *tdb, TDB_DATA key, int32_t v); /**************************************************************************** - Store a int32_t value by string key, return 0 on success, -1 on failure. + Store a int32_t value by string key, return 0 on success, -ve on failure. Input is int32_t in native byte order. Output in tdb is in little-endian. ****************************************************************************/ int tdb_store_int32(struct tdb_context *tdb, const char *keystr, int32_t v); @@ -67,19 +87,19 @@ bool tdb_fetch_uint32_byblob(struct tdb_context *tdb, TDB_DATA key, uint32_t *va bool tdb_fetch_uint32(struct tdb_context *tdb, const char *keystr, uint32_t *value); /**************************************************************************** - Store a uint32_t value by an arbitrary blob key, return 0 on success, -1 on failure. + Store a uint32_t value by an arbitrary blob key, return true on success, false on failure. Input is uint32_t in native byte order. Output in tdb is in little-endian. ****************************************************************************/ bool tdb_store_uint32_byblob(struct tdb_context *tdb, TDB_DATA key, uint32_t value); /**************************************************************************** - Store a uint32_t value by string key, return 0 on success, -1 on failure. + Store a uint32_t value by string key, return true on success, false on failure. Input is uint32_t in native byte order. Output in tdb is in little-endian. ****************************************************************************/ bool tdb_store_uint32(struct tdb_context *tdb, const char *keystr, uint32_t value); /**************************************************************************** - Store a buffer by a null terminated string key. Return 0 on success, -1 + Store a buffer by a null terminated string key. Return 0 on success, -ve on failure. ****************************************************************************/ int tdb_store_bystring(struct tdb_context *tdb, const char *keystr, TDB_DATA data, int flags); @@ -91,7 +111,7 @@ int tdb_store_bystring(struct tdb_context *tdb, const char *keystr, TDB_DATA dat TDB_DATA tdb_fetch_bystring(struct tdb_context *tdb, const char *keystr); /**************************************************************************** - Delete an entry using a null terminated string key. + Delete an entry using a null terminated string key. 0 on success, -ve on err. ****************************************************************************/ int tdb_delete_bystring(struct tdb_context *tdb, const char *keystr); diff --git a/lib/util/wrap_xattr.h b/lib/util/wrap_xattr.h index 64b28d250c..745b93d764 100644 --- a/lib/util/wrap_xattr.h +++ b/lib/util/wrap_xattr.h @@ -1,3 +1,24 @@ +/* + Unix SMB/CIFS implementation. + + POSIX NTVFS backend - xattr support using filesystem xattrs + + Copyright (C) Andrew Tridgell 2004 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + #ifndef __LIB_UTIL_WRAP_XATTR_H__ #define __LIB_UTIL_WRAP_XATTR_H__ diff --git a/lib/util/wscript_build b/lib/util/wscript_build index aad386ef2a..bdc9d10150 100755 --- a/lib/util/wscript_build +++ b/lib/util/wscript_build @@ -1,69 +1,25 @@ #!/usr/bin/env python -common_util_sources = '''talloc_stack.c smb_threads.c xfile.c data_blob.c +bld.SAMBA_LIBRARY('samba-util', + source='''talloc_stack.c smb_threads.c xfile.c data_blob.c util_file.c time.c rbtree.c rfc1738.c select.c genrand.c fsusage.c blocking.c become_daemon.c signal.c system.c params.c util.c util_id.c util_net.c - util_strlist.c idtree.c debug.c fault.c base64.c - util_str_common.c''' - -common_util_headers = 'debug.h' -common_util_public_deps = 'talloc pthread LIBCRYPTO' -s4_util_sources = '''dprintf.c ms_fnmatch.c parmlist.c substitute.c util_str.c''' -s4_util_deps = 'DYNCONFIG' -s4_util_public_deps = 'talloc CHARSET execinfo uid_wrapper' -s4_util_public_headers = 'attr.h byteorder.h data_blob.h memory.h safe_string.h time.h talloc_stack.h xfile.h dlinklist.h util.h' -s4_util_header_path = [ ('dlinklist.h util.h', '.'), ('*', 'util') ] - -if bld.env.enable_s3build or bld.env._SAMBA_BUILD_ == 3: - # as we move files into common between samba-util and samba-util3, move them here. - # Both samba-util and samba-util3 depend on this private library - bld.SAMBA_LIBRARY('samba-util-common', - source=common_util_sources, - public_deps=common_util_public_deps, - # until we get all the dependencies in this library in common - # we need to allow this library to be built with unresolved symbols - allow_undefined_symbols=True, - local_include=False, - public_headers=common_util_headers, - header_path= [('*', 'util') ], - private_library=True - ) - - if bld.env._SAMBA_BUILD_ == 4: - bld.SAMBA_LIBRARY('samba-util', - source=s4_util_sources, - deps=s4_util_deps + ' samba-util-common', - public_deps=s4_util_public_deps, - public_headers=s4_util_public_headers, - header_path= s4_util_header_path, - local_include=False, - vnum='0.0.1', - pc_files='samba-util.pc' - ) - -else: - if bld.env._SAMBA_BUILD_ == 4: - bld.SAMBA_LIBRARY('samba-util', - source=s4_util_sources + " " + common_util_sources, - deps=s4_util_deps, - public_deps=s4_util_public_deps + ' ' + common_util_public_deps, - public_headers=s4_util_public_headers + ' ' + common_util_headers, - header_path= s4_util_header_path, - local_include=False, - vnum='0.0.1', - pc_files='samba-util.pc' - ) - - # dummy subsystem for avoid wider deps changes. - bld.SAMBA_SUBSYSTEM('samba-util-common', - source=[], - deps='samba-util', - local_include=False,) + util_strlist.c util_paths.c idtree.c debug.c fault.c base64.c + util_str.c util_str_common.c substitute.c ms_fnmatch.c + server_id.c dprintf.c parmlist.c''', + deps='DYNCONFIG', + public_deps='talloc execinfo uid_wrapper pthread LIBCRYPTO CHARSET', + public_headers='debug.h attr.h byteorder.h data_blob.h memory.h safe_string.h time.h talloc_stack.h xfile.h dlinklist.h util.h string_wrappers.h', + header_path= [ ('dlinklist.h util.h', '.'), ('*', 'util') ], + local_include=False, + vnum='0.0.1', + pc_files='samba-util.pc' + ) bld.SAMBA_LIBRARY('asn1util', source='asn1.c', - deps='talloc samba-util-common', + deps='talloc samba-util', private_library=True, local_include=False) @@ -88,7 +44,7 @@ bld.SAMBA_LIBRARY('wrap_xattr', bld.SAMBA_LIBRARY('UTIL_TDB', source='util_tdb.c', local_include=False, - public_deps='tdb talloc', + public_deps='tdb_compat talloc', private_library=True ) @@ -121,3 +77,12 @@ bld.SAMBA_SUBSYSTEM('UTIL_PW', local_include=False, public_deps='talloc' ) + + +bld.SAMBA_LIBRARY('tdb-wrap', + source='tdb_wrap.c', + deps='tdb_compat talloc samba-util', + private_library=True, + local_include=False + ) + |