summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMartin Pool <mbp@samba.org>2002-09-09 06:30:48 +0000
committerMartin Pool <mbp@samba.org>2002-09-09 06:30:48 +0000
commit63f411a3f90be3d2b1c0d8cf5af394f1163319c5 (patch)
tree9ee7932e927e65f74e0a9f3aa4b3e850ab0a77a4
parent3245349610a999a2eb0b4e388ad16775d7b54e83 (diff)
downloadsamba-63f411a3f90be3d2b1c0d8cf5af394f1163319c5.tar.gz
samba-63f411a3f90be3d2b1c0d8cf5af394f1163319c5.tar.bz2
samba-63f411a3f90be3d2b1c0d8cf5af394f1163319c5.zip
Import my code to do reasonably fast tdbpack/unpack from Python
(This used to be commit 1f7ed8bb863fdacd0b9f3bc2e1e5d72ec7051feb)
-rw-r--r--source3/python/py_tdbpack.c662
1 files changed, 662 insertions, 0 deletions
diff --git a/source3/python/py_tdbpack.c b/source3/python/py_tdbpack.c
new file mode 100644
index 0000000000..e5044943be
--- /dev/null
+++ b/source3/python/py_tdbpack.c
@@ -0,0 +1,662 @@
+/* -*- c-file-style: "python"; indent-tabs-mode: nil; -*-
+
+ Python wrapper for Samba tdb pack/unpack functions
+ Copyright (C) Martin Pool 2002
+
+
+ NOTE PYTHON STYLE GUIDE
+ http://www.python.org/peps/pep-0007.html
+
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*/
+
+
+
+#include "Python.h"
+
+static int pytdbpack_calc_reqd_len(char *format_str,
+ PyObject *val_seq);
+
+static PyObject *pytdbpack_unpack_item(char,
+ char **pbuf,
+ int *plen);
+static int
+pytdbpack_calc_item_len(char format_ch,
+ PyObject *val_obj);
+
+static PyObject *pytdbpack_pack_data(const char *format_str,
+ PyObject *val_seq,
+ unsigned char *buf);
+
+
+
+static const char * pytdbpack_docstring =
+"Convert between Python values and Samba binary encodings.
+
+This module is conceptually similar to the standard 'struct' module, but it
+uses both a different binary format and a different description string.
+
+Samba's encoding is based on that used inside DCE-RPC and SMB: a
+little-endian, unpadded, non-self-describing binary format. It is intended
+that these functions be as similar as possible to the routines in Samba's
+tdb/tdbutil module, with appropriate adjustments for Python datatypes.
+
+Python strings are used to specify the format of data to be packed or
+unpacked.
+
+Strings in TDBs are typically stored in DOS codepages. The caller of this
+module must make appropriate translations if necessary, typically to and from
+Unicode objects.
+
+tdbpack format strings:
+
+ 'f': NULL-terminated string in DOS codepage
+
+ 'P': same as 'f'
+
+ 'd': 4 byte little-endian number
+
+ 'w': 2 byte little-endian number
+
+ 'P': \"Pointer\" value -- in the subset of DCERPC used by Samba, this is
+ really just an \"exists\" or \"does not exist\" flag. The boolean
+ value of the Python object is used.
+
+ 'B': 4-byte LE length, followed by that many bytes of binary data.
+ Corresponds to a Python byte string of the appropriate length.
+
+ '$': Special flag indicating that the preceding format code should be
+ repeated while data remains. This is only supported for unpacking.
+
+ Every code corresponds to a single Python object, except 'B' which
+ corresponds to two values (length and contents), and '$', which produces
+ however many make sense.
+";
+
+
+static char const pytdbpack_pack_doc[] =
+"pack(format, values) -> buffer
+Pack Python objects into Samba binary format according to format string.
+
+arguments:
+ format -- string of tdbpack format characters
+ values -- sequence of value objects corresponding 1:1 to format characters
+
+returns:
+ buffer -- string containing packed data
+
+raises:
+ IndexError -- if there are not the same number of format codes as of
+ values
+ ValueError -- if any of the format characters is illegal
+ TypeError -- if the format is not a string, or values is not a sequence,
+ or any of the values is of the wrong type for the corresponding
+ format character
+";
+
+
+static char const pytdbpack_unpack_doc[] =
+"unpack(format, buffer) -> (values, rest)
+Unpack Samba binary data according to format string.
+
+arguments:
+ format -- string of tdbpack characters
+ buffer -- string of packed binary data
+
+returns:
+ 2-tuple of:
+ values -- sequence of values corresponding 1:1 to format characters
+ rest -- string containing data that was not decoded, or '' if the
+ whole string was consumed
+
+raises:
+ IndexError -- if there is insufficient data in the buffer for the
+ format (or if the data is corrupt and contains a variable-length
+ field extending past the end)
+ ValueError -- if any of the format characters is illegal
+
+notes:
+ Because unconsumed data is returned, you can feed it back in to the
+ unpacker to extract further fields. Alternatively, if you wish to modify
+ some fields near the start of the data, you may be able to save time by
+ only unpacking and repacking the necessary part.
+";
+
+
+
+/*
+ Game plan is to first of all walk through the arguments and calculate the
+ total length that will be required. We allocate a Python string of that
+ size, then walk through again and fill it in.
+
+ We just borrow references to all the passed arguments, since none of them
+ need to be permanently stored. We transfer ownership to the returned
+ object.
+ */
+static PyObject *
+pytdbpack_pack(PyObject *self,
+ PyObject *args)
+{
+ char *format_str;
+ PyObject *val_seq, *fast_seq, *buf_str;
+ int reqd_len;
+ char *packed_buf;
+
+ /* TODO: Test passing wrong types or too many arguments */
+ if (!PyArg_ParseTuple(args, "sO", &format_str, &val_seq))
+ return NULL;
+
+ /* Convert into a list or tuple (if not already one), so that we can
+ * index more easily. */
+ fast_seq = PySequence_Fast(val_seq,
+ __FUNCTION__ ": argument 2 must be sequence");
+ if (!fast_seq)
+ return NULL;
+
+ reqd_len = pytdbpack_calc_reqd_len(format_str, fast_seq);
+ if (reqd_len == -1) /* exception was thrown */
+ return NULL;
+
+ /* Allocate space.
+
+ This design causes an unnecessary copying of the data when Python
+ constructs an object, and that might possibly be avoided by using a
+ Buffer object of some kind instead. I'm not doing that for now
+ though. */
+ packed_buf = malloc(reqd_len);
+ if (!packed_buf) {
+ PyErr_Format(PyExc_MemoryError,
+ "%s: couldn't allocate %d bytes for packed buffer",
+ __FUNCTION__, reqd_len);
+ return NULL;
+ }
+
+ if (!pytdbpack_pack_data(format_str, fast_seq, packed_buf)) {
+ free(packed_buf);
+ return NULL;
+ }
+
+ buf_str = PyString_FromStringAndSize(packed_buf, reqd_len);
+ free(packed_buf); /* get rid of tmp buf */
+
+ return buf_str;
+}
+
+
+
+static PyObject *
+pytdbpack_unpack(PyObject *self,
+ PyObject *args)
+{
+ char *format_str, *packed_str, *ppacked;
+ PyObject *val_list = NULL, *ret_tuple = NULL;
+ PyObject *rest_string = NULL;
+ int format_len, packed_len;
+ int i;
+ char last_format = '#';
+
+ /* get arguments */
+ if (!PyArg_ParseTuple(args, "ss#", &format_str, &packed_str, &packed_len))
+ return NULL;
+
+ format_len = strlen(format_str);
+
+ /* allocate list to hold results */
+ val_list = PyList_New(format_len);
+ if (!val_list)
+ goto failed;
+ ret_tuple = PyTuple_New(2);
+ if (!ret_tuple)
+ goto failed;
+
+ /* For every object, unpack. */
+ for (ppacked = packed_str, i = 0; i < format_len; i++) {
+ PyObject *val_obj;
+ char format;
+
+ format = format_str[i];
+ if (format == '$') {
+ if (i == 0) {
+ PyErr_Format(PyExc_ValueError,
+ "%s: '$' may not be first character in format",
+ __FUNCTION__);
+ goto failed;
+ }
+ else {
+ format = last_format; /* repeat */
+ }
+ }
+
+ val_obj = pytdbpack_unpack_item(format,
+ &ppacked,
+ &packed_len);
+ if (!val_obj)
+ goto failed;
+
+ PyList_SET_ITEM(val_list, i, val_obj);
+ last_format = format;
+ }
+
+ /* put leftovers in box for lunch tomorrow */
+ rest_string = PyString_FromStringAndSize(ppacked, packed_len);
+ if (!rest_string)
+ goto failed;
+
+ /* return (values, rest) tuple; give up references to them */
+ PyTuple_SET_ITEM(ret_tuple, 0, val_list);
+ val_list = NULL;
+ PyTuple_SET_ITEM(ret_tuple, 1, rest_string);
+ val_list = NULL;
+ return ret_tuple;
+
+ failed:
+ /* handle failure: deallocate anything */
+ Py_XDECREF(val_list);
+ Py_XDECREF(ret_tuple);
+ Py_XDECREF(rest_string);
+ return NULL;
+}
+
+
+/*
+ Internal routine that calculates how many bytes will be required to
+ encode the values in the format.
+
+ Also checks that the value list is the right size for the format list.
+
+ Returns number of bytes (may be 0), or -1 if there's something wrong, in
+ which case a Python exception has been raised.
+
+ Arguments:
+
+ val_seq: a Fast Sequence (list or tuple), being all the values
+*/
+static int
+pytdbpack_calc_reqd_len(char *format_str,
+ PyObject *val_seq)
+{
+ int len = 0;
+ char *p;
+ int val_i;
+ int val_len;
+
+ val_len = PySequence_Fast_GET_SIZE(val_seq);
+
+ for (p = format_str, val_i = 0; *p; p++, val_i++) {
+ char ch = *p;
+ PyObject *val_obj;
+ int item_len;
+
+ if (val_i >= val_len) {
+ PyErr_Format(PyExc_IndexError,
+ "samba.tdbpack.pack: value list is too short for format string");
+ return -1;
+ }
+
+ /* borrow a reference to the item */
+ val_obj = PySequence_Fast_GET_ITEM(val_seq, val_i);
+ if (!val_obj)
+ return -1;
+
+ item_len = pytdbpack_calc_item_len(ch, val_obj);
+ if (item_len == -1)
+ return -1;
+ else
+ len += item_len;
+ }
+
+ if (val_i != val_len) {
+ PyErr_Format(PyExc_IndexError,
+ "%s: value list is wrong length for format string",
+ __FUNCTION__);
+ return -1;
+ }
+
+ return len;
+}
+
+
+/*
+ Calculate the number of bytes required to pack a single value.
+*/
+static int
+pytdbpack_calc_item_len(char ch,
+ PyObject *val_obj)
+{
+ if (ch == 'd' || ch == 'w') {
+ if (!PyInt_Check(val_obj)) {
+ PyErr_Format(PyExc_TypeError,
+ "tdbpack: format '%c' requires an Int",
+ ch);
+ return -1;
+ }
+ if (ch == 'w')
+ return 2;
+ else
+ return 4;
+ } else if (ch == 'p') {
+ return 4;
+ }
+ else if (ch == 'f' || ch == 'P' || ch == 'B') {
+ /* nul-terminated 8-bit string */
+ if (!PyString_Check(val_obj)) {
+ PyErr_Format(PyExc_TypeError,
+ "tdbpack: format '%c' requires a String",
+ ch);
+ return -1;
+ }
+
+ if (ch == 'B') {
+ /* byte buffer; just use Python string's length, plus
+ a preceding word */
+ return 4 + PyString_GET_SIZE(val_obj);
+ }
+ else {
+ /* one nul character */
+ return 1 + PyString_GET_SIZE(val_obj);
+ }
+ }
+ else {
+ PyErr_Format(PyExc_ValueError,
+ __FUNCTION__ ": format character '%c' is not supported",
+ ch);
+
+ return -1;
+ }
+}
+
+
+/*
+ XXX: glib and Samba have quicker macro for doing the endianness conversions,
+ but I don't know of one in plain libc, and it's probably not a big deal. I
+ realize this is kind of dumb because we'll almost always be on x86, but
+ being safe is important.
+*/
+static void pack_int32(unsigned long val_long, unsigned char **pbuf)
+{
+ (*pbuf)[0] = val_long & 0xff;
+ (*pbuf)[1] = (val_long >> 8) & 0xff;
+ (*pbuf)[2] = (val_long >> 16) & 0xff;
+ (*pbuf)[3] = (val_long >> 24) & 0xff;
+ (*pbuf) += 4;
+}
+
+
+static void pack_bytes(long len, const char *from,
+ unsigned char **pbuf)
+{
+ memcpy(*pbuf, from, len);
+ (*pbuf) += len;
+}
+
+
+static void
+unpack_err_too_short(void)
+{
+ PyErr_Format(PyExc_IndexError,
+ __FUNCTION__ ": data too short for unpack format");
+}
+
+
+static PyObject *
+unpack_int32(char **pbuf, int *plen)
+{
+ long v;
+ unsigned char *b;
+
+ if (*plen < 4) {
+ unpack_err_too_short();
+ return NULL;
+ }
+
+ b = *pbuf;
+ v = b[0] | b[1]<<8 | b[2]<<16 | b[3]<<24;
+
+ (*pbuf) += 4;
+ (*plen) -= 4;
+
+ return PyInt_FromLong(v);
+}
+
+
+static PyObject *unpack_int16(char **pbuf, int *plen)
+{
+ long v;
+ unsigned char *b;
+
+ if (*plen < 2) {
+ unpack_err_too_short();
+ return NULL;
+ }
+
+ b = *pbuf;
+ v = b[0] | b[1]<<8;
+
+ (*pbuf) += 2;
+ (*plen) -= 2;
+
+ return PyInt_FromLong(v);
+}
+
+
+static PyObject *
+unpack_string(char **pbuf, int *plen)
+{
+ int len;
+ char *nul_ptr, *start;
+
+ start = *pbuf;
+
+ nul_ptr = memchr(start, '\0', *plen);
+ if (!nul_ptr) {
+ unpack_err_too_short();
+ return NULL;
+ }
+
+ len = nul_ptr - start;
+
+ *pbuf += len + 1; /* skip \0 */
+ *plen -= len + 1;
+
+ return PyString_FromStringAndSize(start, len);
+}
+
+
+static PyObject *
+unpack_buffer(char **pbuf, int *plen)
+{
+ /* first get 32-bit len */
+ long slen;
+ unsigned char *b;
+ unsigned char *start;
+
+ if (*plen < 4) {
+ unpack_err_too_short();
+ return NULL;
+ }
+
+ b = *pbuf;
+ slen = b[0] | b[1]<<8 | b[2]<<16 | b[3]<<24;
+
+ if (slen < 0) { /* surely you jest */
+ PyErr_Format(PyExc_ValueError,
+ __FUNCTION__ ": buffer seems to have negative length");
+ return NULL;
+ }
+
+ (*pbuf) += 4;
+ (*plen) -= 4;
+ start = *pbuf;
+
+ if (*plen < slen) {
+ PyErr_Format(PyExc_IndexError,
+ __FUNCTION__ ": not enough data to unpack buffer: "
+ "need %d bytes, have %d",
+ (int) slen, *plen);
+ return NULL;
+ }
+
+ (*pbuf) += slen;
+ (*plen) -= slen;
+
+ return PyString_FromStringAndSize(start, slen);
+}
+
+
+/* Unpack a single field from packed data, according to format character CH.
+ Remaining data is at *PBUF, of *PLEN.
+
+ *PBUF is advanced, and *PLEN reduced to reflect the amount of data that has
+ been consumed.
+
+ Returns a reference to the unpacked Python object, or NULL for failure.
+*/
+static PyObject *pytdbpack_unpack_item(char ch,
+ char **pbuf,
+ int *plen)
+{
+ if (ch == 'w') { /* 16-bit int */
+ return unpack_int16(pbuf, plen);
+ }
+ else if (ch == 'd' || ch == 'p') { /* 32-bit int */
+ /* pointers can just come through as integers */
+ return unpack_int32(pbuf, plen);
+ }
+ else if (ch == 'f' || ch == 'P') { /* nul-term string */
+ return unpack_string(pbuf, plen);
+ }
+ else if (ch == 'B') { /* length, buffer */
+ return unpack_buffer(pbuf, plen);
+ }
+ else {
+ PyErr_Format(PyExc_ValueError,
+ __FUNCTION__ ": format character '%c' is not supported",
+ ch);
+
+ return NULL;
+ }
+}
+
+
+
+/*
+ Pack a single item VAL_OBJ, encoded using format CH, into a buffer at *PBUF,
+ and advance the pointer. Buffer length has been pre-calculated so we are
+ sure that there is enough space.
+
+*/
+static PyObject *
+pytdbpack_pack_item(char ch,
+ PyObject *val_obj,
+ unsigned char **pbuf)
+{
+ if (ch == 'w') {
+ unsigned long val_long = PyInt_AsLong(val_obj);
+ (*pbuf)[0] = val_long & 0xff;
+ (*pbuf)[1] = (val_long >> 8) & 0xff;
+ (*pbuf) += 2;
+ }
+ else if (ch == 'd') {
+ /* 4-byte LE number */
+ pack_int32(PyInt_AsLong(val_obj), pbuf);
+ }
+ else if (ch == 'p') {
+ /* "Pointer" value -- in the subset of DCERPC used by Samba,
+ this is really just an "exists" or "does not exist"
+ flag. */
+ pack_int32(PyObject_IsTrue(val_obj), pbuf);
+ }
+ else if (ch == 'f' || ch == 'P') {
+ int size;
+ char *sval;
+
+ size = PyString_GET_SIZE(val_obj);
+ sval = PyString_AS_STRING(val_obj);
+ pack_bytes(size+1, sval, pbuf); /* include nul */
+ }
+ else if (ch == 'B') {
+ int size;
+ char *sval;
+
+ size = PyString_GET_SIZE(val_obj);
+ pack_int32(size, pbuf);
+ sval = PyString_AS_STRING(val_obj);
+ pack_bytes(size, sval, pbuf); /* do not include nul */
+ }
+ else {
+ /* this ought to be caught while calculating the length, but
+ just in case. */
+ PyErr_Format(PyExc_ValueError,
+ "%s: format character '%c' is not supported",
+ __FUNCTION__, ch);
+
+ return NULL;
+ }
+
+ return Py_None;
+}
+
+
+/*
+ Pack data according to FORMAT_STR from the elements of VAL_SEQ into
+ PACKED_BUF.
+
+ The string has already been checked out, so we know that VAL_SEQ is large
+ enough to hold the packed data, and that there are enough value items.
+ (However, their types may not have been thoroughly checked yet.)
+
+ In addition, val_seq is a Python Fast sequence.
+
+ Returns NULL for error (with exception set), or None.
+*/
+PyObject *
+pytdbpack_pack_data(const char *format_str,
+ PyObject *val_seq,
+ unsigned char *packed_buf)
+{
+ int i;
+
+ for (i = 0; format_str[i]; i++) {
+ char ch = format_str[i];
+ PyObject *val_obj;
+
+ /* borrow a reference to the item */
+ val_obj = PySequence_Fast_GET_ITEM(val_seq, i);
+ if (!val_obj)
+ return NULL;
+
+ if (!pytdbpack_pack_item(ch, val_obj, &packed_buf))
+ return NULL;
+ }
+
+ return Py_None;
+}
+
+
+
+
+
+static PyMethodDef pytdbpack_methods[] = {
+ { "pack", pytdbpack_pack, METH_VARARGS, (char *) pytdbpack_pack_doc },
+ { "unpack", pytdbpack_unpack, METH_VARARGS, (char *) pytdbpack_unpack_doc },
+};
+
+DL_EXPORT(void)
+inittdbpack(void)
+{
+ Py_InitModule3("tdbpack", pytdbpack_methods,
+ (char *) pytdbpack_docstring);
+}