Import my code to do reasonably fast tdbpack/unpack from Python

(This used to be commit 1f7ed8bb863fdacd0b9f3bc2e1e5d72ec7051feb)
author: Martin Pool <mbp@samba.org> 2002-09-09 06:30:48 +0000
committer: Martin Pool <mbp@samba.org> 2002-09-09 06:30:48 +0000
commit: 63f411a3f90be3d2b1c0d8cf5af394f1163319c5 (patch)
tree: 9ee7932e927e65f74e0a9f3aa4b3e850ab0a77a4 /source3/python
parent: 3245349610a999a2eb0b4e388ad16775d7b54e83 (diff)
download: samba-63f411a3f90be3d2b1c0d8cf5af394f1163319c5.tar.gz
samba-63f411a3f90be3d2b1c0d8cf5af394f1163319c5.tar.bz2
samba-63f411a3f90be3d2b1c0d8cf5af394f1163319c5.zip
1 files changed, 662 insertions, 0 deletions
diff --git a/source3/python/py_tdbpack.c b/source3/python/py_tdbpack.c
new file mode 100644
index 0000000000..e5044943be
--- /dev/null
+++ b/source3/python/py_tdbpack.c
@@ -0,0 +1,662 @@
+/* -*- c-file-style: "python"; indent-tabs-mode: nil; -*-
+	 
+   Python wrapper for Samba tdb pack/unpack functions
+   Copyright (C) Martin Pool 2002
+
+
+   NOTE PYTHON STYLE GUIDE
+   http://www.python.org/peps/pep-0007.html
+   
+   
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2 of the License, or
+   (at your option) any later version.
+   
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+   
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*/
+
+
+
+#include "Python.h"
+
+static int pytdbpack_calc_reqd_len(char *format_str,
+				   PyObject *val_seq);
+
+static PyObject *pytdbpack_unpack_item(char,
+				      char **pbuf,
+				      int *plen);
+static int
+pytdbpack_calc_item_len(char format_ch,
+			PyObject *val_obj);
+
+static PyObject *pytdbpack_pack_data(const char *format_str,
+				     PyObject *val_seq,
+				     unsigned char *buf);
+
+
+	
+static const char * pytdbpack_docstring =
+"Convert between Python values and Samba binary encodings.
+
+This module is conceptually similar to the standard 'struct' module, but it
+uses both a different binary format and a different description string.
+
+Samba's encoding is based on that used inside DCE-RPC and SMB: a
+little-endian, unpadded, non-self-describing binary format.  It is intended
+that these functions be as similar as possible to the routines in Samba's
+tdb/tdbutil module, with appropriate adjustments for Python datatypes.
+
+Python strings are used to specify the format of data to be packed or
+unpacked.
+
+Strings in TDBs are typically stored in DOS codepages.  The caller of this
+module must make appropriate translations if necessary, typically to and from
+Unicode objects.
+
+tdbpack format strings:
+
+    'f':  NULL-terminated string in DOS codepage
+
+    'P':  same as 'f'
+
+    'd':  4 byte little-endian number
+
+    'w':  2 byte little-endian number
+
+    'P': \"Pointer\" value -- in the subset of DCERPC used by Samba, this is
+          really just an \"exists\" or \"does not exist\" flag.  The boolean
+          value of the Python object is used.
+    
+    'B': 4-byte LE length, followed by that many bytes of binary data.
+         Corresponds to a Python byte string of the appropriate length.
+
+    '$': Special flag indicating that the preceding format code should be
+         repeated while data remains.  This is only supported for unpacking.
+
+    Every code corresponds to a single Python object, except 'B' which
+    corresponds to two values (length and contents), and '$', which produces
+    however many make sense.
+";
+
+
+static char const pytdbpack_pack_doc[] = 
+"pack(format, values) -> buffer
+Pack Python objects into Samba binary format according to format string.
+
+arguments:
+    format -- string of tdbpack format characters
+    values -- sequence of value objects corresponding 1:1 to format characters
+
+returns:
+    buffer -- string containing packed data
+
+raises:
+    IndexError -- if there are not the same number of format codes as of
+        values
+    ValueError -- if any of the format characters is illegal
+    TypeError  -- if the format is not a string, or values is not a sequence,
+        or any of the values is of the wrong type for the corresponding
+        format character
+";
+
+
+static char const pytdbpack_unpack_doc[] =
+"unpack(format, buffer) -> (values, rest)
+Unpack Samba binary data according to format string.
+
+arguments:
+    format -- string of tdbpack characters
+    buffer -- string of packed binary data
+
+returns:
+    2-tuple of:
+        values -- sequence of values corresponding 1:1 to format characters
+        rest -- string containing data that was not decoded, or '' if the
+            whole string was consumed
+
+raises:
+    IndexError -- if there is insufficient data in the buffer for the
+        format (or if the data is corrupt and contains a variable-length
+        field extending past the end)
+    ValueError -- if any of the format characters is illegal
+
+notes:
+    Because unconsumed data is returned, you can feed it back in to the
+    unpacker to extract further fields.  Alternatively, if you wish to modify
+    some fields near the start of the data, you may be able to save time by
+    only unpacking and repacking the necessary part.
+";
+
+
+
+/*
+  Game plan is to first of all walk through the arguments and calculate the
+  total length that will be required.  We allocate a Python string of that
+  size, then walk through again and fill it in.
+
+  We just borrow references to all the passed arguments, since none of them
+  need to be permanently stored.  We transfer ownership to the returned
+  object.
+ */	
+static PyObject *
+pytdbpack_pack(PyObject *self,
+	       PyObject *args)
+{
+	char *format_str;
+	PyObject *val_seq, *fast_seq, *buf_str;
+	int reqd_len;
+	char *packed_buf;
+
+	/* TODO: Test passing wrong types or too many arguments */
+	if (!PyArg_ParseTuple(args, "sO", &format_str, &val_seq))
+		return NULL;
+
+	/* Convert into a list or tuple (if not already one), so that we can
+	 * index more easily. */
+	fast_seq = PySequence_Fast(val_seq,
+				   __FUNCTION__ ": argument 2 must be sequence");
+	if (!fast_seq)
+		return NULL;
+			
+	reqd_len = pytdbpack_calc_reqd_len(format_str, fast_seq);
+	if (reqd_len == -1)	/* exception was thrown */
+		return NULL;
+
+	/* Allocate space.
+	 
+	   This design causes an unnecessary copying of the data when Python
+	   constructs an object, and that might possibly be avoided by using a
+	   Buffer object of some kind instead.  I'm not doing that for now
+	   though.  */
+	packed_buf = malloc(reqd_len);
+	if (!packed_buf) {
+		PyErr_Format(PyExc_MemoryError,
+			     "%s: couldn't allocate %d bytes for packed buffer",
+			     __FUNCTION__, reqd_len);
+		return NULL;
+	}	
+	
+	if (!pytdbpack_pack_data(format_str, fast_seq, packed_buf)) {
+		free(packed_buf);
+		return NULL;
+	}
+
+	buf_str = PyString_FromStringAndSize(packed_buf, reqd_len);
+	free(packed_buf);	/* get rid of tmp buf */
+	
+	return buf_str;
+}
+
+
+
+static PyObject *
+pytdbpack_unpack(PyObject *self,
+		 PyObject *args)
+{
+	char *format_str, *packed_str, *ppacked;
+	PyObject *val_list = NULL, *ret_tuple = NULL;
+	PyObject *rest_string = NULL;
+	int format_len, packed_len;
+	int i;
+	char last_format = '#';
+	
+	/* get arguments */
+	if (!PyArg_ParseTuple(args, "ss#", &format_str, &packed_str, &packed_len))
+		return NULL;
+
+	format_len = strlen(format_str);
+	
+	/* allocate list to hold results */
+	val_list = PyList_New(format_len);
+	if (!val_list)
+		goto failed;
+	ret_tuple = PyTuple_New(2);
+	if (!ret_tuple)
+		goto failed;
+	
+	/* For every object, unpack.  */
+	for (ppacked = packed_str, i = 0; i < format_len; i++) {
+		PyObject *val_obj;
+		char format;
+
+		format = format_str[i];
+		if (format == '$') {
+			if (i == 0) {
+				PyErr_Format(PyExc_ValueError,
+					     "%s: '$' may not be first character in format",
+					     __FUNCTION__);
+				goto failed;
+			}
+			else {
+				format = last_format; /* repeat */
+			}
+		}
+
+		val_obj = pytdbpack_unpack_item(format,
+						&ppacked,
+						&packed_len);
+		if (!val_obj)
+			goto failed;
+
+		PyList_SET_ITEM(val_list, i, val_obj);
+		last_format = format;
+	}
+
+	/* put leftovers in box for lunch tomorrow */
+	rest_string = PyString_FromStringAndSize(ppacked, packed_len);
+	if (!rest_string)
+		goto failed;
+
+	/* return (values, rest) tuple; give up references to them */
+	PyTuple_SET_ITEM(ret_tuple, 0, val_list);
+	val_list = NULL;
+	PyTuple_SET_ITEM(ret_tuple, 1, rest_string);
+	val_list = NULL;
+	return ret_tuple;
+
+  failed:
+	/* handle failure: deallocate anything */
+	Py_XDECREF(val_list);
+	Py_XDECREF(ret_tuple);
+	Py_XDECREF(rest_string);
+	return NULL;
+}
+
+
+/*
+  Internal routine that calculates how many bytes will be required to
+  encode the values in the format.
+
+  Also checks that the value list is the right size for the format list.
+
+  Returns number of bytes (may be 0), or -1 if there's something wrong, in
+  which case a Python exception has been raised.
+
+  Arguments:
+
+    val_seq: a Fast Sequence (list or tuple), being all the values
+*/
+static int
+pytdbpack_calc_reqd_len(char *format_str,
+			PyObject *val_seq)
+{
+	int len = 0;
+	char *p;
+	int val_i;
+	int val_len;
+
+	val_len = PySequence_Fast_GET_SIZE(val_seq);
+
+	for (p = format_str, val_i = 0; *p; p++, val_i++) {
+		char ch = *p;
+		PyObject *val_obj;
+		int item_len;
+
+		if (val_i >= val_len) {
+			PyErr_Format(PyExc_IndexError,
+				     "samba.tdbpack.pack: value list is too short for format string");
+			return -1;
+		}
+
+		/* borrow a reference to the item */
+		val_obj = PySequence_Fast_GET_ITEM(val_seq, val_i);
+		if (!val_obj)
+			return -1;
+
+		item_len = pytdbpack_calc_item_len(ch, val_obj);
+		if (item_len == -1)
+			return -1;
+		else
+			len += item_len;
+	}
+
+	if (val_i != val_len) {
+		PyErr_Format(PyExc_IndexError,
+			     "%s: value list is wrong length for format string",
+			     __FUNCTION__);
+		return -1;
+	}
+
+	return len;
+}
+
+
+/*
+  Calculate the number of bytes required to pack a single value.
+*/
+static int
+pytdbpack_calc_item_len(char ch,
+			PyObject *val_obj)
+{
+	if (ch == 'd' || ch == 'w') {
+		if (!PyInt_Check(val_obj)) {
+			PyErr_Format(PyExc_TypeError,
+				     "tdbpack: format '%c' requires an Int",
+				     ch);
+			return -1;
+		}
+		if (ch == 'w')
+			return 2;
+		else 
+			return 4;
+	} else if (ch == 'p') {
+		return 4;
+	}
+	else if (ch == 'f' || ch == 'P' || ch == 'B') {
+		/* nul-terminated 8-bit string */
+		if (!PyString_Check(val_obj)) {
+			PyErr_Format(PyExc_TypeError,
+				     "tdbpack: format '%c' requires a String",
+				     ch);
+			return -1;
+		}
+		
+		if (ch == 'B') {
+			/* byte buffer; just use Python string's length, plus
+			   a preceding word */
+			return 4 + PyString_GET_SIZE(val_obj);
+		}
+		else {
+			/* one nul character */
+			return 1 + PyString_GET_SIZE(val_obj);
+		}		
+	}
+	else {	
+		PyErr_Format(PyExc_ValueError,
+			     __FUNCTION__ ": format character '%c' is not supported",
+			     ch);
+		
+		return -1;
+	}
+}
+
+
+/*
+  XXX: glib and Samba have quicker macro for doing the endianness conversions,
+  but I don't know of one in plain libc, and it's probably not a big deal.  I
+  realize this is kind of dumb because we'll almost always be on x86, but
+  being safe is important.
+*/
+static void pack_int32(unsigned long val_long, unsigned char **pbuf)
+{
+	(*pbuf)[0] =         val_long & 0xff;
+	(*pbuf)[1] = (val_long >> 8)  & 0xff;
+	(*pbuf)[2] = (val_long >> 16) & 0xff;
+	(*pbuf)[3] = (val_long >> 24) & 0xff;
+	(*pbuf) += 4;
+}
+
+
+static void pack_bytes(long len, const char *from,
+		       unsigned char **pbuf)
+{
+	memcpy(*pbuf, from, len);
+	(*pbuf) += len;
+}
+
+
+static void
+unpack_err_too_short(void)
+{
+	PyErr_Format(PyExc_IndexError,
+		     __FUNCTION__ ": data too short for unpack format");
+}
+
+
+static PyObject *
+unpack_int32(char **pbuf, int *plen)
+{
+	long v;
+	unsigned char *b;
+	
+	if (*plen < 4) {
+		unpack_err_too_short();
+		return NULL;
+	}
+
+	b = *pbuf;
+	v = b[0] | b[1]<<8 | b[2]<<16 | b[3]<<24;
+	
+	(*pbuf) += 4;
+	(*plen) -= 4;
+
+	return PyInt_FromLong(v);
+}
+
+
+static PyObject *unpack_int16(char **pbuf, int *plen)
+{
+	long v;
+	unsigned char *b;
+	
+	if (*plen < 2) {
+		unpack_err_too_short();
+		return NULL;
+	}
+
+	b = *pbuf;
+	v = b[0] | b[1]<<8;
+	
+	(*pbuf) += 2;
+	(*plen) -= 2;
+
+	return PyInt_FromLong(v);
+}
+
+
+static PyObject *
+unpack_string(char **pbuf, int *plen)
+{
+	int len;
+	char *nul_ptr, *start;
+
+	start = *pbuf;
+	
+	nul_ptr = memchr(start, '\0', *plen);
+	if (!nul_ptr) {
+		unpack_err_too_short();
+		return NULL;
+	}
+
+	len = nul_ptr - start;
+
+	*pbuf += len + 1;	/* skip \0 */
+	*plen -= len + 1;
+
+	return PyString_FromStringAndSize(start, len);
+}
+
+
+static PyObject *
+unpack_buffer(char **pbuf, int *plen)
+{
+	/* first get 32-bit len */
+	long slen;
+	unsigned char *b;
+	unsigned char *start;
+	
+	if (*plen < 4) {
+		unpack_err_too_short();
+		return NULL;
+	}
+	
+	b = *pbuf;
+	slen = b[0] | b[1]<<8 | b[2]<<16 | b[3]<<24;
+
+	if (slen < 0) { /* surely you jest */
+		PyErr_Format(PyExc_ValueError,
+			     __FUNCTION__ ": buffer seems to have negative length");
+		return NULL;
+	}
+
+	(*pbuf) += 4;
+	(*plen) -= 4;
+	start = *pbuf;
+
+	if (*plen < slen) {
+		PyErr_Format(PyExc_IndexError,
+			     __FUNCTION__ ": not enough data to unpack buffer: "
+			     "need %d bytes, have %d",
+			     (int) slen, *plen);
+		return NULL;
+	}
+
+	(*pbuf) += slen;
+	(*plen) -= slen;
+
+	return PyString_FromStringAndSize(start, slen);
+}
+
+
+/* Unpack a single field from packed data, according to format character CH.
+   Remaining data is at *PBUF, of *PLEN.
+
+   *PBUF is advanced, and *PLEN reduced to reflect the amount of data that has
+   been consumed.
+
+   Returns a reference to the unpacked Python object, or NULL for failure.
+*/
+static PyObject *pytdbpack_unpack_item(char ch,
+				       char **pbuf,
+				       int *plen)
+{
+	if (ch == 'w') {	/* 16-bit int */
+		return unpack_int16(pbuf, plen);
+	}
+	else if (ch == 'd' || ch == 'p') { /* 32-bit int */
+		/* pointers can just come through as integers */
+		return unpack_int32(pbuf, plen);
+	}
+	else if (ch == 'f' || ch == 'P') { /* nul-term string  */
+		return unpack_string(pbuf, plen);
+	}
+	else if (ch == 'B') { /* length, buffer */
+		return unpack_buffer(pbuf, plen);
+	}
+	else {
+		PyErr_Format(PyExc_ValueError,
+			     __FUNCTION__ ": format character '%c' is not supported",
+			     ch);
+		
+		return NULL;
+	}
+}
+
+
+
+/*
+  Pack a single item VAL_OBJ, encoded using format CH, into a buffer at *PBUF,
+  and advance the pointer.  Buffer length has been pre-calculated so we are
+  sure that there is enough space.
+
+*/
+static PyObject *
+pytdbpack_pack_item(char ch,
+		    PyObject *val_obj,
+		    unsigned char **pbuf)
+{
+	if (ch == 'w') {
+		unsigned long val_long = PyInt_AsLong(val_obj);
+		(*pbuf)[0] = val_long & 0xff;
+		(*pbuf)[1] = (val_long >> 8) & 0xff;
+		(*pbuf) += 2;
+	}
+	else if (ch == 'd') {
+		/* 4-byte LE number */
+		pack_int32(PyInt_AsLong(val_obj), pbuf);
+	}
+	else if (ch == 'p') {
+		/* "Pointer" value -- in the subset of DCERPC used by Samba,
+		   this is really just an "exists" or "does not exist"
+		   flag. */
+		pack_int32(PyObject_IsTrue(val_obj), pbuf);
+	}
+	else if (ch == 'f' || ch == 'P') {
+		int size;
+		char *sval;
+
+		size = PyString_GET_SIZE(val_obj);
+		sval = PyString_AS_STRING(val_obj);
+		pack_bytes(size+1, sval, pbuf); /* include nul */
+	}
+	else if (ch == 'B') {
+		int size;
+		char *sval;
+
+		size = PyString_GET_SIZE(val_obj);
+		pack_int32(size, pbuf);
+		sval = PyString_AS_STRING(val_obj);
+		pack_bytes(size, sval, pbuf); /* do not include nul */
+	}
+	else {
+		/* this ought to be caught while calculating the length, but
+		   just in case. */
+		PyErr_Format(PyExc_ValueError,
+			     "%s: format character '%c' is not supported",
+			     __FUNCTION__, ch);
+		
+		return NULL;
+	}
+		
+	return Py_None;
+}
+
+
+/*
+  Pack data according to FORMAT_STR from the elements of VAL_SEQ into
+  PACKED_BUF.
+
+  The string has already been checked out, so we know that VAL_SEQ is large
+  enough to hold the packed data, and that there are enough value items.
+  (However, their types may not have been thoroughly checked yet.)
+
+  In addition, val_seq is a Python Fast sequence.
+
+  Returns NULL for error (with exception set), or None.
+*/
+PyObject *
+pytdbpack_pack_data(const char *format_str,
+		    PyObject *val_seq,
+		    unsigned char *packed_buf)
+{
+	int i;
+
+	for (i = 0; format_str[i]; i++) {
+		char ch = format_str[i];
+		PyObject *val_obj;
+
+		/* borrow a reference to the item */
+		val_obj = PySequence_Fast_GET_ITEM(val_seq, i);
+		if (!val_obj)
+			return NULL;
+
+		if (!pytdbpack_pack_item(ch, val_obj, &packed_buf))
+			return NULL;
+	}
+
+	return Py_None;
+}
+
+
+
+
+
+static PyMethodDef pytdbpack_methods[] = {
+	{ "pack", pytdbpack_pack, METH_VARARGS, (char *) pytdbpack_pack_doc },
+	{ "unpack", pytdbpack_unpack, METH_VARARGS, (char *) pytdbpack_unpack_doc },
+};
+
+DL_EXPORT(void)
+inittdbpack(void)
+{
+	Py_InitModule3("tdbpack", pytdbpack_methods,
+		       (char *) pytdbpack_docstring);
+}
author	Martin Pool <mbp@samba.org>	2002-09-09 06:30:48 +0000
committer	Martin Pool <mbp@samba.org>	2002-09-09 06:30:48 +0000
commit	63f411a3f90be3d2b1c0d8cf5af394f1163319c5 (patch)
tree	9ee7932e927e65f74e0a9f3aa4b3e850ab0a77a4 /source3/python
parent	3245349610a999a2eb0b4e388ad16775d7b54e83 (diff)
download	samba-63f411a3f90be3d2b1c0d8cf5af394f1163319c5.tar.gz samba-63f411a3f90be3d2b1c0d8cf5af394f1163319c5.tar.bz2 samba-63f411a3f90be3d2b1c0d8cf5af394f1163319c5.zip