From 9c3446d347236a77b5ad28bcc077c3122e2ef885 Mon Sep 17 00:00:00 2001
From: Tim Potter <tpot@samba.org>
Date: Mon, 11 Nov 2002 05:48:12 +0000
Subject: Sync with HEAD. (This used to be commit
 130c1d0765502be839f87a8d0f4e47f5a3461c97)

---
 source3/python/py_tdbpack.c | 448 +++++++++++++++++++++++++++-----------------
 1 file changed, 274 insertions(+), 174 deletions(-)

diff --git a/source3/python/py_tdbpack.c b/source3/python/py_tdbpack.c
index 87cd804ed4..7180c3e12c 100644
--- a/source3/python/py_tdbpack.c
+++ b/source3/python/py_tdbpack.c
@@ -27,16 +27,17 @@
 
 #include "Python.h"
 
-static int pytdbpack_calc_reqd_len(char *format_str,
-				   PyObject *val_seq);
+static PyObject * pytdbpack_number(char ch, PyObject *val_iter, PyObject *packed_list);
+static PyObject * pytdbpack_str_850(PyObject *val_iter, PyObject *packed_list);
+static PyObject * pytdbpack_buffer(PyObject *val_iter, PyObject *packed_list);
 
 static PyObject *pytdbpack_unpack_item(char, char **pbuf, int *plen, PyObject *);
 
-static PyObject *pytdbpack_pack_data(const char *format_str,
+static PyObject *pytdbpack_data(const char *format_str,
 				     PyObject *val_seq,
-				     unsigned char *buf);
-
+				     PyObject *val_list);
 
+static void pack_le_uint32(unsigned long val_long, unsigned char *pbuf);
 
 
 static PyObject *pytdbpack_bad_type(char ch,
@@ -57,13 +58,17 @@ tdb/tdbutil module, with appropriate adjustments for Python datatypes.
 Python strings are used to specify the format of data to be packed or
 unpacked.
 
-Strings in TDBs are typically stored in DOS codepages.  The caller of this
-module must make appropriate translations if necessary, typically to and from
-Unicode objects.
+Strings are always stored in codepage 850.  Unicode objects are translated
+to cp850; plain strings are assumed to be in latin-1 and are also
+translated.
+
+This may be a problem in the future if it is different to the Samba codepage.
+It might be better to have the caller do the conversion, but that would conflict
+with existing CMI code.
 
 tdbpack format strings:
 
-    'f':  NULL-terminated string in DOS codepage
+    'f':  NULL-terminated string in codepage 850
 
     'P':  same as 'f'
 
@@ -88,7 +93,7 @@ tdbpack format strings:
 ";
 
 
-static char const pytdbpack_pack_doc[] = 
+static char const pytdbpack_doc[] = 
 "pack(format, values) -> buffer
 Pack Python objects into Samba binary format according to format string.
 
@@ -141,65 +146,274 @@ notes:
 
 
 
+
 /*
-  Game plan is to first of all walk through the arguments and calculate the
-  total length that will be required.  We allocate a Python string of that
-  size, then walk through again and fill it in.
-
-  We just borrow references to all the passed arguments, since none of them
-  need to be permanently stored.  We transfer ownership to the returned
-  object.
- */	
+  * Pack objects to bytes.
+  *
+  * All objects are first individually encoded onto a list, and then the list
+  * of strings is concatenated.  This is faster than concatenating strings,
+  * and reasonably simple to code.
+  */
 static PyObject *
-pytdbpack_pack(PyObject *self,
+pytdbpack(PyObject *self,
 	       PyObject *args)
 {
 	char *format_str;
-	PyObject *val_seq, *fast_seq, *buf_str;
-	int reqd_len;
-	char *packed_buf;
+	PyObject *val_seq, *val_iter = NULL,
+		*packed_list = NULL, *packed_str = NULL,
+		*empty_str = NULL;
 
 	/* TODO: Test passing wrong types or too many arguments */
 	if (!PyArg_ParseTuple(args, "sO", &format_str, &val_seq))
 		return NULL;
 
-	/* Convert into a list or tuple (if not already one), so that we can
-	 * index more easily. */
-	fast_seq = PySequence_Fast(val_seq,
-				   __FUNCTION__ ": argument 2 must be sequence");
-	if (!fast_seq)
-		return NULL;
-			
-	reqd_len = pytdbpack_calc_reqd_len(format_str, fast_seq);
-	if (reqd_len == -1)	/* exception was thrown */
-		return NULL;
+	if (!(val_iter = PyObject_GetIter(val_seq)))
+		goto out;
 
-	/* Allocate space.
-	 
-	   This design causes an unnecessary copying of the data when Python
-	   constructs an object, and that might possibly be avoided by using a
-	   Buffer object of some kind instead.  I'm not doing that for now
-	   though.  */
-	packed_buf = malloc(reqd_len);
-	if (!packed_buf) {
-		PyErr_Format(PyExc_MemoryError,
-			     "%s: couldn't allocate %d bytes for packed buffer",
-			     __FUNCTION__, reqd_len);
-		return NULL;
-	}	
+	/* Create list to hold strings until we're done, then join them all. */
+	if (!(packed_list = PyList_New(0)))
+		goto out;
+
+	if (!pytdbpack_data(format_str, val_iter, packed_list))
+		goto out;
+
+	/* this function is not officially documented but it works */
+	if (!(empty_str = PyString_InternFromString("")))
+		goto out;
 	
-	if (!pytdbpack_pack_data(format_str, fast_seq, packed_buf)) {
-		free(packed_buf);
-		return NULL;
+	packed_str = _PyString_Join(empty_str, packed_list);
+
+  out:
+	Py_XDECREF(empty_str);
+	Py_XDECREF(val_iter);
+	Py_XDECREF(packed_list);
+
+	return packed_str;
+}
+
+
+/*
+  Pack data according to FORMAT_STR from the elements of VAL_SEQ into
+  PACKED_BUF.
+
+  The string has already been checked out, so we know that VAL_SEQ is large
+  enough to hold the packed data, and that there are enough value items.
+  (However, their types may not have been thoroughly checked yet.)
+
+  In addition, val_seq is a Python Fast sequence.
+
+  Returns NULL for error (with exception set), or None.
+*/
+PyObject *
+pytdbpack_data(const char *format_str,
+		    PyObject *val_iter,
+		    PyObject *packed_list)
+{
+	int format_i, val_i = 0;
+
+	for (format_i = 0, val_i = 0; format_str[format_i]; format_i++) {
+		char ch = format_str[format_i];
+
+		switch (ch) {
+			/* dispatch to the appropriate packer for this type,
+			   which should pull things off the iterator, and
+			   append them to the packed_list */
+		case 'w':
+		case 'd':
+		case 'p':
+			if (!(packed_list = pytdbpack_number(ch, val_iter, packed_list)))
+				return NULL;
+			break;
+
+		case 'f':
+		case 'P':
+			if (!(packed_list = pytdbpack_str_850(val_iter, packed_list)))
+				return NULL;
+			break;
+
+		case 'B':
+			if (!(packed_list = pytdbpack_buffer(val_iter, packed_list)))
+				return NULL;
+			break;
+
+		default:
+			PyErr_Format(PyExc_ValueError,
+				     "%s: format character '%c' is not supported",
+				     __FUNCTION__, ch);
+			return NULL;
+		}
+	}
+
+	return packed_list;
+}
+
+
+static PyObject *
+pytdbpack_number(char ch, PyObject *val_iter, PyObject *packed_list)
+{
+	unsigned long val_long;
+	PyObject *val_obj = NULL, *long_obj = NULL, *result_obj = NULL;
+	PyObject *new_list = NULL;
+	unsigned char pack_buf[4];
+
+	if (!(val_obj = PyIter_Next(val_iter)))
+		goto out;
+
+	if (!(long_obj = PyNumber_Long(val_obj))) {
+		pytdbpack_bad_type(ch, "Number", val_obj);
+		goto out;
 	}
 
-	buf_str = PyString_FromStringAndSize(packed_buf, reqd_len);
-	free(packed_buf);	/* get rid of tmp buf */
+	val_long = PyLong_AsUnsignedLong(long_obj);
+	pack_le_uint32(val_long, pack_buf);
+
+	/* pack as 32-bit; if just packing a 'w' 16-bit word then only take
+	   the first two bytes. */
 	
-	return buf_str;
+	if (!(result_obj = PyString_FromStringAndSize(pack_buf, ch == 'w' ? 2 : 4)))
+		goto out;
+
+	if (PyList_Append(packed_list, result_obj) != -1)
+		new_list = packed_list;
+
+  out:
+	Py_XDECREF(val_obj);
+	Py_XDECREF(long_obj);
+	Py_XDECREF(result_obj);
+
+	return new_list;
+}
+
+
+/*
+ * Take one string from the iterator val_iter, convert it to 8-bit CP850, and
+ * return it.
+ *
+ * If the input is neither a string nor Unicode, an exception is raised.
+ *
+ * If the input is Unicode, then it is converted to CP850.
+ *
+ * If the input is a String, then it is converted to Unicode using the default
+ * decoding method, and then converted to CP850.  This in effect gives
+ * conversion from latin-1 (currently the PSA's default) to CP850, without
+ * needing a custom translation table.
+ *
+ * I hope this approach avoids being too fragile w.r.t. being passed either
+ * Unicode or String objects.
+ */
+static PyObject *
+pytdbpack_str_850(PyObject *val_iter, PyObject *packed_list)
+{
+	PyObject *val_obj = NULL;
+	PyObject *unicode_obj = NULL;
+	PyObject *cp850_str = NULL;
+	PyObject *nul_str = NULL;
+	PyObject *new_list = NULL;
+
+	if (!(val_obj = PyIter_Next(val_iter)))
+		goto out;
+
+	if (PyUnicode_Check(val_obj)) {
+		unicode_obj = val_obj;
+	}
+	else {
+		/* string */
+		if (!(unicode_obj = PyString_AsDecodedObject(val_obj, NULL, NULL)))
+			goto out;
+		Py_XDECREF(val_obj);
+		val_obj = NULL;
+	}
+
+	if (!(cp850_str = PyUnicode_AsEncodedString(unicode_obj, "cp850", NULL)))
+		goto out;
+
+	if (!nul_str)
+		/* this is constant and often-used; hold it forever */
+		if (!(nul_str = PyString_FromStringAndSize("", 1)))
+			goto out;
+
+	if ((PyList_Append(packed_list, cp850_str) != -1)
+	    && (PyList_Append(packed_list, nul_str) != -1))
+		new_list = packed_list;
+
+  out:
+	Py_XDECREF(unicode_obj);
+	Py_XDECREF(cp850_str);
+
+	return new_list;
 }
 
 
+/*
+ * Pack (LENGTH, BUFFER) pair onto the list.
+ *
+ * The buffer must already be a String, not Unicode, because it contains 8-bit
+ * untranslated data.  In some cases it will actually be UTF_16_LE data.
+ */
+static PyObject *
+pytdbpack_buffer(PyObject *val_iter, PyObject *packed_list)
+{
+	PyObject *val_obj;
+	PyObject *new_list = NULL;
+	
+	/* pull off integer and stick onto list */
+	if (!(packed_list = pytdbpack_number('d', val_iter, packed_list)))
+		return NULL;
+
+	/* this assumes that the string is the right length; the old code did the same. */
+	if (!(val_obj = PyIter_Next(val_iter)))
+		return NULL;
+
+	if (!PyString_Check(val_obj)) {
+		pytdbpack_bad_type('B', "String", val_obj);
+		goto out;
+	}
+	
+	if (PyList_Append(packed_list, val_obj) != -1)
+		new_list = packed_list;
+
+  out:
+	Py_XDECREF(val_obj);
+	return new_list;
+}
+
+
+#if 0
+else if (ch == 'B') {
+			long size;
+			char *sval;
+
+			if (!PyNumber_Check(val_obj)) {
+				pytdbpack_bad_type(ch, "Number", val_obj);
+				return NULL;
+			}
+
+			if (!(val_obj = PyNumber_Long(val_obj)))
+				return NULL;
+
+			size = PyLong_AsLong(val_obj);
+			pack_le_uint32(size, &packed);
+
+			/* Release the new reference created by the cast */
+			Py_DECREF(val_obj);
+
+			val_obj = PySequence_GetItem(val_seq, val_i++);
+			if (!val_obj)
+				return NULL;
+			
+			sval = PyString_AsString(val_obj);
+			if (!sval)
+				return NULL;
+			
+			pack_bytes(size, sval, &packed); /* do not include nul */
+		}
+		else {
+		
+	}
+		
+	return Py_None;
+}
+#endif
 
 static PyObject *
 pytdbpack_unpack(PyObject *self,
@@ -270,6 +484,8 @@ pytdbpack_unpack(PyObject *self,
 }
 
 
+
+#if 0
 /*
   Internal routine that calculates how many bytes will be required to
   encode the values in the format.
@@ -361,6 +577,7 @@ pytdbpack_calc_reqd_len(char *format_str,
 
 	return len;
 }
+#endif
 
 
 static PyObject *pytdbpack_bad_type(char ch,
@@ -384,13 +601,12 @@ static PyObject *pytdbpack_bad_type(char ch,
   realize this is kind of dumb because we'll almost always be on x86, but
   being safe is important.
 */
-static void pack_uint32(unsigned long val_long, unsigned char **pbuf)
+static void pack_le_uint32(unsigned long val_long, unsigned char *pbuf)
 {
-	(*pbuf)[0] =         val_long & 0xff;
-	(*pbuf)[1] = (val_long >> 8)  & 0xff;
-	(*pbuf)[2] = (val_long >> 16) & 0xff;
-	(*pbuf)[3] = (val_long >> 24) & 0xff;
-	(*pbuf) += 4;
+	pbuf[0] =         val_long & 0xff;
+	pbuf[1] = (val_long >> 8)  & 0xff;
+	pbuf[2] = (val_long >> 16) & 0xff;
+	pbuf[3] = (val_long >> 24) & 0xff;
 }
 
 
@@ -581,126 +797,10 @@ static PyObject *pytdbpack_unpack_item(char ch,
 
 
 
-/*
-  Pack data according to FORMAT_STR from the elements of VAL_SEQ into
-  PACKED_BUF.
-
-  The string has already been checked out, so we know that VAL_SEQ is large
-  enough to hold the packed data, and that there are enough value items.
-  (However, their types may not have been thoroughly checked yet.)
-
-  In addition, val_seq is a Python Fast sequence.
-
-  Returns NULL for error (with exception set), or None.
-*/
-PyObject *
-pytdbpack_pack_data(const char *format_str,
-		    PyObject *val_seq,
-		    unsigned char *packed)
-{
-	int format_i, val_i = 0;
-
-	for (format_i = 0, val_i = 0; format_str[format_i]; format_i++) {
-		char ch = format_str[format_i];
-		PyObject *val_obj;
-
-		/* borrow a reference to the item */
-		val_obj = PySequence_GetItem(val_seq, val_i++);
-		if (!val_obj)
-			return NULL;
-
-		if (ch == 'w') {
-			unsigned long val_long;
-			PyObject *long_obj;
-			
-			if (!(long_obj = PyNumber_Long(val_obj))) {
-				pytdbpack_bad_type(ch, "Long", val_obj);
-				return NULL;
-			}
-			
-			val_long = PyLong_AsUnsignedLong(long_obj);
-			(packed)[0] = val_long & 0xff;
-			(packed)[1] = (val_long >> 8) & 0xff;
-			(packed) += 2;
-			Py_DECREF(long_obj);
-		}
-		else if (ch == 'd') {
-			/* 4-byte LE number */
-			PyObject *long_obj;
-			
-			if (!(long_obj = PyNumber_Long(val_obj))) {
-				pytdbpack_bad_type(ch, "Long", val_obj);
-				return NULL;
-			}
-			
-			pack_uint32(PyLong_AsUnsignedLong(long_obj), &packed);
-
-			Py_DECREF(long_obj);
-		}
-		else if (ch == 'p') {
-			/* "Pointer" value -- in the subset of DCERPC used by Samba,
-			   this is really just an "exists" or "does not exist"
-			   flag. */
-			pack_uint32(PyObject_IsTrue(val_obj), &packed);
-		}
-		else if (ch == 'f' || ch == 'P') {
-			int size;
-			char *sval;
-
-			size = PySequence_Length(val_obj);
-			if (size < 0)
-				return NULL;
-			sval = PyString_AsString(val_obj);
-			if (!sval)
-				return NULL;
-			pack_bytes(size+1, sval, &packed); /* include nul */
-		}
-		else if (ch == 'B') {
-			long size;
-			char *sval;
-
-			if (!PyNumber_Check(val_obj)) {
-				pytdbpack_bad_type(ch, "Number", val_obj);
-				return NULL;
-			}
-
-			if (!(val_obj = PyNumber_Long(val_obj)))
-				return NULL;
-
-			size = PyLong_AsLong(val_obj);
-			pack_uint32(size, &packed);
-
-			/* Release the new reference created by the cast */
-			Py_DECREF(val_obj);
-
-			val_obj = PySequence_GetItem(val_seq, val_i++);
-			if (!val_obj)
-				return NULL;
-			
-			sval = PyString_AsString(val_obj);
-			if (!sval)
-				return NULL;
-			
-			pack_bytes(size, sval, &packed); /* do not include nul */
-		}
-		else {
-			/* this ought to be caught while calculating the length, but
-			   just in case. */
-			PyErr_Format(PyExc_ValueError,
-				     "%s: format character '%c' is not supported",
-				     __FUNCTION__, ch);
-		
-			return NULL;
-		}
-	}
-		
-	return Py_None;
-}
-
 
 
 static PyMethodDef pytdbpack_methods[] = {
-	{ "pack", pytdbpack_pack, METH_VARARGS, (char *) pytdbpack_pack_doc },
+	{ "pack", pytdbpack, METH_VARARGS, (char *) pytdbpack_doc },
 	{ "unpack", pytdbpack_unpack, METH_VARARGS, (char *) pytdbpack_unpack_doc },
 };
 
-- 
cgit