1 files changed, 725 insertions, 0 deletions
diff --git a/source4/python/py_tdbpack.c b/source4/python/py_tdbpack.c
new file mode 100644
index 0000000000..f0718b717e
--- /dev/null
+++ b/source4/python/py_tdbpack.c
@@ -0,0 +1,725 @@
+/* -*- c-file-style: "python"; indent-tabs-mode: nil; -*-
+	 
+   Python wrapper for Samba tdb pack/unpack functions
+   Copyright (C) Martin Pool 2002, 2003
+
+
+   NOTE PYTHON STYLE GUIDE
+   http://www.python.org/peps/pep-0007.html
+   
+   
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2 of the License, or
+   (at your option) any later version.
+   
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+   
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*/
+
+#include "Python.h"
+
+/* This symbol is used in both config.h and Python.h which causes an
+   annoying compiler warning. */
+
+#ifdef HAVE_FSTAT
+#undef HAVE_FSTAT
+#endif
+
+/* This module is supposed to be standalone, however for portability
+   it would be good to use the FUNCTION_MACRO preprocessor define. */
+
+#include "include/config.h"
+
+#ifdef HAVE_FUNCTION_MACRO
+#define FUNCTION_MACRO  (__FUNCTION__)
+#else
+#define FUNCTION_MACRO  (__FILE__)
+#endif
+
+static PyObject * pytdbpack_number(char ch, PyObject *val_iter, PyObject *packed_list);
+static PyObject * pytdbpack_str(char ch,
+				PyObject *val_iter, PyObject *packed_list,
+				const char *encoding);
+static PyObject * pytdbpack_buffer(PyObject *val_iter, PyObject *packed_list);
+
+static PyObject *pytdbunpack_item(char, char **pbuf, int *plen, PyObject *);
+
+static PyObject *pytdbpack_data(const char *format_str,
+				     PyObject *val_seq,
+				     PyObject *val_list);
+
+static PyObject *
+pytdbunpack_string(char **pbuf, int *plen, const char *encoding);
+
+static void pack_le_uint32(unsigned long val_long, unsigned char *pbuf);
+
+
+static PyObject *pytdbpack_bad_type(char ch,
+				    const char *expected,
+				    PyObject *val_obj);
+
+static const char * pytdbpack_docstring =
+"Convert between Python values and Samba binary encodings.
+
+This module is conceptually similar to the standard 'struct' module, but it
+uses both a different binary format and a different description string.
+
+Samba's encoding is based on that used inside DCE-RPC and SMB: a
+little-endian, unpadded, non-self-describing binary format.  It is intended
+that these functions be as similar as possible to the routines in Samba's
+tdb/tdbutil module, with appropriate adjustments for Python datatypes.
+
+Python strings are used to specify the format of data to be packed or
+unpacked.
+
+String encodings are implied by the database format: they may be either DOS
+codepage (currently hardcoded to 850), or Unix codepage (currently hardcoded
+to be the same as the default Python encoding).
+
+tdbpack format strings:
+
+    'f': NUL-terminated string in codepage iso8859-1
+   
+    'P': same as 'f'
+
+    'F': NUL-terminated string in iso-8859-1
+
+    'd':  4 byte little-endian unsigned number
+
+    'w':  2 byte little-endian unsigned number
+
+    'P': \"Pointer\" value -- in the subset of DCERPC used by Samba, this is
+          really just an \"exists\" or \"does not exist\" flag.  The boolean
+          value of the Python object is used.
+    
+    'B': 4-byte LE length, followed by that many bytes of binary data.
+         Corresponds to a Python integer giving the length, followed by a byte
+         string of the appropriate length.
+
+    '$': Special flag indicating that the preceding format code should be
+         repeated while data remains.  This is only supported for unpacking.
+
+    Every code corresponds to a single Python object, except 'B' which
+    corresponds to two values (length and contents), and '$', which produces
+    however many make sense.
+";
+
+
+static char const pytdbpack_doc[] = 
+"pack(format, values) -> buffer
+Pack Python objects into Samba binary format according to format string.
+
+arguments:
+    format -- string of tdbpack format characters
+    values -- sequence of value objects corresponding 1:1 to format characters
+
+returns:
+    buffer -- string containing packed data
+
+raises:
+    IndexError -- if there are too few values for the format
+    ValueError -- if any of the format characters is illegal
+    TypeError  -- if the format is not a string, or values is not a sequence,
+        or any of the values is of the wrong type for the corresponding
+        format character
+
+notes:
+    For historical reasons, it is not an error to pass more values than are consumed
+    by the format.
+";
+
+
+static char const pytdbunpack_doc[] =
+"unpack(format, buffer) -> (values, rest)
+Unpack Samba binary data according to format string.
+
+arguments:
+    format -- string of tdbpack characters
+    buffer -- string of packed binary data
+
+returns:
+    2-tuple of:
+        values -- sequence of values corresponding 1:1 to format characters
+        rest -- string containing data that was not decoded, or '' if the
+            whole string was consumed
+
+raises:
+    IndexError -- if there is insufficient data in the buffer for the
+        format (or if the data is corrupt and contains a variable-length
+        field extending past the end)
+    ValueError -- if any of the format characters is illegal
+
+notes:
+    Because unconsumed data is returned, you can feed it back in to the
+    unpacker to extract further fields.  Alternatively, if you wish to modify
+    some fields near the start of the data, you may be able to save time by
+    only unpacking and repacking the necessary part.
+";
+
+
+const char *pytdb_dos_encoding = "cp850";
+
+/* NULL, meaning that the Samba default encoding *must* be the same as the
+   Python default encoding. */
+const char *pytdb_unix_encoding = NULL;
+
+
+/*
+  * Pack objects to bytes.
+  *
+  * All objects are first individually encoded onto a list, and then the list
+  * of strings is concatenated.  This is faster than concatenating strings,
+  * and reasonably simple to code.
+  */
+static PyObject *
+pytdbpack(PyObject *self,
+	       PyObject *args)
+{
+	char *format_str;
+	PyObject *val_seq, *val_iter = NULL,
+		*packed_list = NULL, *packed_str = NULL,
+		*empty_str = NULL;
+
+	/* TODO: Test passing wrong types or too many arguments */
+	if (!PyArg_ParseTuple(args, "sO", &format_str, &val_seq))
+		return NULL;
+
+	if (!(val_iter = PyObject_GetIter(val_seq)))
+		goto out;
+
+	/* Create list to hold strings until we're done, then join them all. */
+	if (!(packed_list = PyList_New(0)))
+		goto out;
+
+	if (!pytdbpack_data(format_str, val_iter, packed_list))
+		goto out;
+
+	/* this function is not officially documented but it works */
+	if (!(empty_str = PyString_InternFromString("")))
+		goto out;
+	
+	packed_str = _PyString_Join(empty_str, packed_list);
+
+  out:
+	Py_XDECREF(empty_str);
+	Py_XDECREF(val_iter);
+	Py_XDECREF(packed_list);
+
+	return packed_str;
+}
+
+
+/*
+  Pack data according to FORMAT_STR from the elements of VAL_SEQ into
+  PACKED_BUF.
+
+  The string has already been checked out, so we know that VAL_SEQ is large
+  enough to hold the packed data, and that there are enough value items.
+  (However, their types may not have been thoroughly checked yet.)
+
+  In addition, val_seq is a Python Fast sequence.
+
+  Returns NULL for error (with exception set), or None.
+*/
+PyObject *
+pytdbpack_data(const char *format_str,
+		    PyObject *val_iter,
+		    PyObject *packed_list)
+{
+	int format_i, val_i = 0;
+
+	for (format_i = 0, val_i = 0; format_str[format_i]; format_i++) {
+		char ch = format_str[format_i];
+
+		switch (ch) {
+			/* dispatch to the appropriate packer for this type,
+			   which should pull things off the iterator, and
+			   append them to the packed_list */
+		case 'w':
+		case 'd':
+		case 'p':
+			if (!(packed_list = pytdbpack_number(ch, val_iter, packed_list)))
+				return NULL;
+			break;
+
+		case 'f':
+		case 'P':
+			if (!(packed_list = pytdbpack_str(ch, val_iter, packed_list, pytdb_unix_encoding)))
+				return NULL;
+			break;
+
+		case 'B':
+			if (!(packed_list = pytdbpack_buffer(val_iter, packed_list)))
+				return NULL;
+			break;
+
+		default:
+			PyErr_Format(PyExc_ValueError,
+				     "%s: format character '%c' is not supported",
+				     FUNCTION_MACRO, ch);
+			return NULL;
+		}
+	}
+
+	return packed_list;
+}
+
+
+static PyObject *
+pytdbpack_number(char ch, PyObject *val_iter, PyObject *packed_list)
+{
+	unsigned long val_long;
+	PyObject *val_obj = NULL, *long_obj = NULL, *result_obj = NULL;
+	PyObject *new_list = NULL;
+	unsigned char pack_buf[4];
+
+	if (!(val_obj = PyIter_Next(val_iter)))
+		goto out;
+
+	if (!(long_obj = PyNumber_Long(val_obj))) {
+		pytdbpack_bad_type(ch, "Number", val_obj);
+		goto out;
+	}
+
+	val_long = PyLong_AsUnsignedLong(long_obj);
+	pack_le_uint32(val_long, pack_buf);
+
+	/* pack as 32-bit; if just packing a 'w' 16-bit word then only take
+	   the first two bytes. */
+	
+	if (!(result_obj = PyString_FromStringAndSize(pack_buf, ch == 'w' ? 2 : 4)))
+		goto out;
+
+	if (PyList_Append(packed_list, result_obj) != -1)
+		new_list = packed_list;
+
+  out:
+	Py_XDECREF(val_obj);
+	Py_XDECREF(long_obj);
+	Py_XDECREF(result_obj);
+
+	return new_list;
+}
+
+
+/*
+ * Take one string from the iterator val_iter, convert it to 8-bit, and return
+ * it.
+ *
+ * If the input is neither a string nor Unicode, an exception is raised.
+ *
+ * If the input is Unicode, then it is converted to the appropriate encoding.
+ *
+ * If the input is a String, and encoding is not null, then it is converted to
+ * Unicode using the default decoding method, and then converted to the
+ * encoding.  If the encoding is NULL, then the string is written out as-is --
+ * this is used when the default Python encoding is the same as the Samba
+ * encoding.
+ *
+ * I hope this approach avoids being too fragile w.r.t. being passed either
+ * Unicode or String objects.
+ */
+static PyObject *
+pytdbpack_str(char ch,
+	      PyObject *val_iter, PyObject *packed_list, const char *encoding)
+{
+	PyObject *val_obj = NULL;
+	PyObject *unicode_obj = NULL;
+	PyObject *coded_str = NULL;
+	PyObject *nul_str = NULL;
+	PyObject *new_list = NULL;
+
+	if (!(val_obj = PyIter_Next(val_iter)))
+		goto out;
+
+	if (PyUnicode_Check(val_obj)) {
+		if (!(coded_str = PyUnicode_AsEncodedString(val_obj, encoding, NULL)))
+			goto out;
+	}
+	else if (PyString_Check(val_obj) && !encoding) {
+		/* For efficiency, we assume that the Python interpreter has
+		   the same default string encoding as Samba's native string
+		   encoding.  On the PSA, both are always 8859-1. */
+		coded_str = val_obj;
+		Py_INCREF(coded_str);
+	}
+	else if (PyString_Check(val_obj)) {
+		/* String, but needs to be converted */
+		if (!(unicode_obj = PyString_AsDecodedObject(val_obj, NULL, NULL)))
+			goto out;
+		if (!(coded_str = PyUnicode_AsEncodedString(unicode_obj, encoding, NULL)))
+			goto out;
+	}
+	else {
+		pytdbpack_bad_type(ch, "String or Unicode", val_obj);
+		goto out;
+	}
+
+	if (!nul_str)
+		/* this is constant and often-used; hold it forever */
+		if (!(nul_str = PyString_FromStringAndSize("", 1)))
+			goto out;
+
+	if ((PyList_Append(packed_list, coded_str) != -1)
+	    && (PyList_Append(packed_list, nul_str) != -1))
+		new_list = packed_list;
+
+  out:
+	Py_XDECREF(val_obj);
+	Py_XDECREF(unicode_obj);
+	Py_XDECREF(coded_str);
+
+	return new_list;
+}
+
+
+/*
+ * Pack (LENGTH, BUFFER) pair onto the list.
+ *
+ * The buffer must already be a String, not Unicode, because it contains 8-bit
+ * untranslated data.  In some cases it will actually be UTF_16_LE data.
+ */
+static PyObject *
+pytdbpack_buffer(PyObject *val_iter, PyObject *packed_list)
+{
+	PyObject *val_obj;
+	PyObject *new_list = NULL;
+	
+	/* pull off integer and stick onto list */
+	if (!(packed_list = pytdbpack_number('d', val_iter, packed_list)))
+		return NULL;
+
+	/* this assumes that the string is the right length; the old code did
+	   the same. */
+	if (!(val_obj = PyIter_Next(val_iter)))
+		return NULL;
+
+	if (!PyString_Check(val_obj)) {
+		pytdbpack_bad_type('B', "String", val_obj);
+		goto out;
+	}
+	
+	if (PyList_Append(packed_list, val_obj) != -1)
+		new_list = packed_list;
+
+  out:
+	Py_XDECREF(val_obj);
+	return new_list;
+}
+
+
+static PyObject *pytdbpack_bad_type(char ch,
+				    const char *expected,
+				    PyObject *val_obj)
+{
+	PyObject *r = PyObject_Repr(val_obj);
+	if (!r)
+		return NULL;
+	PyErr_Format(PyExc_TypeError,
+		     "tdbpack: format '%c' requires %s, not %s",
+		     ch, expected, PyString_AS_STRING(r));
+	Py_DECREF(r);
+	return val_obj;
+}
+
+
+/*
+  XXX: glib and Samba have quicker macro for doing the endianness conversions,
+  but I don't know of one in plain libc, and it's probably not a big deal.  I
+  realize this is kind of dumb because we'll almost always be on x86, but
+  being safe is important.
+*/
+static void pack_le_uint32(unsigned long val_long, unsigned char *pbuf)
+{
+	pbuf[0] =         val_long & 0xff;
+	pbuf[1] = (val_long >> 8)  & 0xff;
+	pbuf[2] = (val_long >> 16) & 0xff;
+	pbuf[3] = (val_long >> 24) & 0xff;
+}
+
+
+static void pack_bytes(long len, const char *from,
+		       unsigned char **pbuf)
+{
+	memcpy(*pbuf, from, len);
+	(*pbuf) += len;
+}
+
+
+
+static PyObject *
+pytdbunpack(PyObject *self,
+		 PyObject *args)
+{
+	char *format_str, *packed_str, *ppacked;
+	PyObject *val_list = NULL, *ret_tuple = NULL;
+	PyObject *rest_string = NULL;
+	int format_len, packed_len;
+	char last_format = '#';	/* invalid */
+	int i;
+	
+	/* get arguments */
+	if (!PyArg_ParseTuple(args, "ss#", &format_str, &packed_str, &packed_len))
+		return NULL;
+
+	format_len = strlen(format_str);
+	
+	/* Allocate list to hold results.  Initially empty, and we append
+	   results as we go along. */
+	val_list = PyList_New(0);
+	if (!val_list)
+		goto failed;
+	ret_tuple = PyTuple_New(2);
+	if (!ret_tuple)
+		goto failed;
+	
+	/* For every object, unpack.  */
+	for (ppacked = packed_str, i = 0; i < format_len && format_str[i] != '$'; i++) {
+		last_format = format_str[i];
+		/* packed_len is reduced in place */
+		if (!pytdbunpack_item(format_str[i], &ppacked, &packed_len, val_list))
+			goto failed;
+	}
+
+	/* If the last character was '$', keep going until out of space */
+	if (format_str[i] == '$') {
+		if (i == 0) {
+			PyErr_Format(PyExc_ValueError,
+				     "%s: '$' may not be first character in format",
+				     FUNCTION_MACRO);
+			return NULL;
+		} 
+		while (packed_len > 0)
+			if (!pytdbunpack_item(last_format, &ppacked, &packed_len, val_list))
+				goto failed;
+	}
+	
+	/* save leftovers for next time */
+	rest_string = PyString_FromStringAndSize(ppacked, packed_len);
+	if (!rest_string)
+		goto failed;
+
+	/* return (values, rest) tuple; give up references to them */
+	PyTuple_SET_ITEM(ret_tuple, 0, val_list);
+	val_list = NULL;
+	PyTuple_SET_ITEM(ret_tuple, 1, rest_string);
+	val_list = NULL;
+	return ret_tuple;
+
+  failed:
+	/* handle failure: deallocate anything.  XDECREF forms handle NULL
+	   pointers for objects that haven't been allocated yet. */
+	Py_XDECREF(val_list);
+	Py_XDECREF(ret_tuple);
+	Py_XDECREF(rest_string);
+	return NULL;
+}
+
+
+static void
+pytdbunpack_err_too_short(void)
+{
+	PyErr_Format(PyExc_IndexError,
+		     "%s: data too short for unpack format", FUNCTION_MACRO);
+}
+
+
+static PyObject *
+pytdbunpack_uint32(char **pbuf, int *plen)
+{
+	unsigned long v;
+	unsigned char *b;
+	
+	if (*plen < 4) {
+		pytdbunpack_err_too_short();
+		return NULL;
+	}
+
+	b = *pbuf;
+	v = b[0] | b[1]<<8 | b[2]<<16 | b[3]<<24;
+	
+	(*pbuf) += 4;
+	(*plen) -= 4;
+
+	return PyLong_FromUnsignedLong(v);
+}
+
+
+static PyObject *pytdbunpack_int16(char **pbuf, int *plen)
+{
+	long v;
+	unsigned char *b;
+	
+	if (*plen < 2) {
+		pytdbunpack_err_too_short();
+		return NULL;
+	}
+
+	b = *pbuf;
+	v = b[0] | b[1]<<8;
+	
+	(*pbuf) += 2;
+	(*plen) -= 2;
+
+	return PyInt_FromLong(v);
+}
+
+
+static PyObject *
+pytdbunpack_string(char **pbuf, int *plen, const char *encoding)
+{
+	int len;
+	char *nul_ptr, *start;
+
+	start = *pbuf;
+	
+	nul_ptr = memchr(start, '\0', *plen);
+	if (!nul_ptr) {
+		pytdbunpack_err_too_short();
+		return NULL;
+	}
+
+	len = nul_ptr - start;
+
+	*pbuf += len + 1;	/* skip \0 */
+	*plen -= len + 1;
+
+	return PyString_Decode(start, len, encoding, NULL);
+}
+
+
+static PyObject *
+pytdbunpack_buffer(char **pbuf, int *plen, PyObject *val_list)
+{
+	/* first get 32-bit len */
+	long slen;
+	unsigned char *b;
+	unsigned char *start;
+	PyObject *str_obj = NULL, *len_obj = NULL;
+	
+	if (*plen < 4) {
+		pytdbunpack_err_too_short();
+		return NULL;
+	}
+	
+	b = *pbuf;
+	slen = b[0] | b[1]<<8 | b[2]<<16 | b[3]<<24;
+
+	if (slen < 0) { /* surely you jest */
+		PyErr_Format(PyExc_ValueError,
+			     "%s: buffer seems to have negative length", FUNCTION_MACRO);
+		return NULL;
+	}
+
+	(*pbuf) += 4;
+	(*plen) -= 4;
+	start = *pbuf;
+
+	if (*plen < slen) {
+		PyErr_Format(PyExc_IndexError,
+			     "%s: not enough data to unpack buffer: "
+			     "need %d bytes, have %d", FUNCTION_MACRO,
+			     (int) slen, *plen);
+		return NULL;
+	}
+
+	(*pbuf) += slen;
+	(*plen) -= slen;
+
+	if (!(len_obj = PyInt_FromLong(slen)))
+		goto failed;
+
+	if (PyList_Append(val_list, len_obj) == -1)
+		goto failed;
+	
+	if (!(str_obj = PyString_FromStringAndSize(start, slen)))
+		goto failed;
+	
+	if (PyList_Append(val_list, str_obj) == -1)
+		goto failed;
+	
+	Py_DECREF(len_obj);
+	Py_DECREF(str_obj);
+	
+	return val_list;
+
+  failed:
+	Py_XDECREF(len_obj);	/* handles NULL */
+	Py_XDECREF(str_obj);
+	return NULL;
+}
+
+
+/* Unpack a single field from packed data, according to format character CH.
+   Remaining data is at *PBUF, of *PLEN.
+
+   *PBUF is advanced, and *PLEN reduced to reflect the amount of data that has
+   been consumed.
+
+   Returns a reference to None, or NULL for failure.
+*/
+static PyObject *pytdbunpack_item(char ch,
+				  char **pbuf,
+				  int *plen,
+				  PyObject *val_list)
+{
+	PyObject *unpacked;
+	
+	if (ch == 'w') {	/* 16-bit int */
+		unpacked = pytdbunpack_int16(pbuf, plen);
+	}
+	else if (ch == 'd' || ch == 'p') { /* 32-bit int */
+		/* pointers can just come through as integers */
+		unpacked = pytdbunpack_uint32(pbuf, plen);
+	}
+	else if (ch == 'f' || ch == 'P') { /* nul-term string  */
+		unpacked = pytdbunpack_string(pbuf, plen, pytdb_unix_encoding);
+	}
+	else if (ch == 'B') { /* length, buffer */
+		return pytdbunpack_buffer(pbuf, plen, val_list);
+	}
+	else {
+		PyErr_Format(PyExc_ValueError,
+			     "%s: format character '%c' is not supported", 
+                             FUNCTION_MACRO, ch);
+		
+		return NULL;
+	}
+
+	/* otherwise OK */
+	if (!unpacked)
+		return NULL;
+
+	if (PyList_Append(val_list, unpacked) == -1)
+		val_list = NULL;
+
+	/* PyList_Append takes a new reference to the inserted object.
+	   Therefore, we no longer need the original reference. */
+	Py_DECREF(unpacked);
+	
+	return val_list;
+}
+
+
+
+
+
+
+static PyMethodDef pytdbpack_methods[] = {
+	{ "pack", pytdbpack, METH_VARARGS, (char *) pytdbpack_doc },
+	{ "unpack", pytdbunpack, METH_VARARGS, (char *) pytdbunpack_doc },
+};
+
+DL_EXPORT(void)
+inittdbpack(void)
+{
+	Py_InitModule3("tdbpack", pytdbpack_methods,
+		       (char *) pytdbpack_docstring);
+}