summaryrefslogtreecommitdiff
path: root/source3/python/py_tdbpack.c
diff options
context:
space:
mode:
Diffstat (limited to 'source3/python/py_tdbpack.c')
-rw-r--r--source3/python/py_tdbpack.c448
1 files changed, 174 insertions, 274 deletions
diff --git a/source3/python/py_tdbpack.c b/source3/python/py_tdbpack.c
index 7180c3e12c..87cd804ed4 100644
--- a/source3/python/py_tdbpack.c
+++ b/source3/python/py_tdbpack.c
@@ -27,17 +27,16 @@
#include "Python.h"
-static PyObject * pytdbpack_number(char ch, PyObject *val_iter, PyObject *packed_list);
-static PyObject * pytdbpack_str_850(PyObject *val_iter, PyObject *packed_list);
-static PyObject * pytdbpack_buffer(PyObject *val_iter, PyObject *packed_list);
+static int pytdbpack_calc_reqd_len(char *format_str,
+ PyObject *val_seq);
static PyObject *pytdbpack_unpack_item(char, char **pbuf, int *plen, PyObject *);
-static PyObject *pytdbpack_data(const char *format_str,
+static PyObject *pytdbpack_pack_data(const char *format_str,
PyObject *val_seq,
- PyObject *val_list);
+ unsigned char *buf);
+
-static void pack_le_uint32(unsigned long val_long, unsigned char *pbuf);
static PyObject *pytdbpack_bad_type(char ch,
@@ -58,17 +57,13 @@ tdb/tdbutil module, with appropriate adjustments for Python datatypes.
Python strings are used to specify the format of data to be packed or
unpacked.
-Strings are always stored in codepage 850. Unicode objects are translated
-to cp850; plain strings are assumed to be in latin-1 and are also
-translated.
-
-This may be a problem in the future if it is different to the Samba codepage.
-It might be better to have the caller do the conversion, but that would conflict
-with existing CMI code.
+Strings in TDBs are typically stored in DOS codepages. The caller of this
+module must make appropriate translations if necessary, typically to and from
+Unicode objects.
tdbpack format strings:
- 'f': NULL-terminated string in codepage 850
+ 'f': NULL-terminated string in DOS codepage
'P': same as 'f'
@@ -93,7 +88,7 @@ tdbpack format strings:
";
-static char const pytdbpack_doc[] =
+static char const pytdbpack_pack_doc[] =
"pack(format, values) -> buffer
Pack Python objects into Samba binary format according to format string.
@@ -146,274 +141,65 @@ notes:
-
/*
- * Pack objects to bytes.
- *
- * All objects are first individually encoded onto a list, and then the list
- * of strings is concatenated. This is faster than concatenating strings,
- * and reasonably simple to code.
- */
+ Game plan is to first of all walk through the arguments and calculate the
+ total length that will be required. We allocate a Python string of that
+ size, then walk through again and fill it in.
+
+ We just borrow references to all the passed arguments, since none of them
+ need to be permanently stored. We transfer ownership to the returned
+ object.
+ */
static PyObject *
-pytdbpack(PyObject *self,
+pytdbpack_pack(PyObject *self,
PyObject *args)
{
char *format_str;
- PyObject *val_seq, *val_iter = NULL,
- *packed_list = NULL, *packed_str = NULL,
- *empty_str = NULL;
+ PyObject *val_seq, *fast_seq, *buf_str;
+ int reqd_len;
+ char *packed_buf;
/* TODO: Test passing wrong types or too many arguments */
if (!PyArg_ParseTuple(args, "sO", &format_str, &val_seq))
return NULL;
- if (!(val_iter = PyObject_GetIter(val_seq)))
- goto out;
-
- /* Create list to hold strings until we're done, then join them all. */
- if (!(packed_list = PyList_New(0)))
- goto out;
-
- if (!pytdbpack_data(format_str, val_iter, packed_list))
- goto out;
-
- /* this function is not officially documented but it works */
- if (!(empty_str = PyString_InternFromString("")))
- goto out;
-
- packed_str = _PyString_Join(empty_str, packed_list);
-
- out:
- Py_XDECREF(empty_str);
- Py_XDECREF(val_iter);
- Py_XDECREF(packed_list);
-
- return packed_str;
-}
-
-
-/*
- Pack data according to FORMAT_STR from the elements of VAL_SEQ into
- PACKED_BUF.
-
- The string has already been checked out, so we know that VAL_SEQ is large
- enough to hold the packed data, and that there are enough value items.
- (However, their types may not have been thoroughly checked yet.)
-
- In addition, val_seq is a Python Fast sequence.
-
- Returns NULL for error (with exception set), or None.
-*/
-PyObject *
-pytdbpack_data(const char *format_str,
- PyObject *val_iter,
- PyObject *packed_list)
-{
- int format_i, val_i = 0;
-
- for (format_i = 0, val_i = 0; format_str[format_i]; format_i++) {
- char ch = format_str[format_i];
-
- switch (ch) {
- /* dispatch to the appropriate packer for this type,
- which should pull things off the iterator, and
- append them to the packed_list */
- case 'w':
- case 'd':
- case 'p':
- if (!(packed_list = pytdbpack_number(ch, val_iter, packed_list)))
- return NULL;
- break;
-
- case 'f':
- case 'P':
- if (!(packed_list = pytdbpack_str_850(val_iter, packed_list)))
- return NULL;
- break;
-
- case 'B':
- if (!(packed_list = pytdbpack_buffer(val_iter, packed_list)))
- return NULL;
- break;
-
- default:
- PyErr_Format(PyExc_ValueError,
- "%s: format character '%c' is not supported",
- __FUNCTION__, ch);
- return NULL;
- }
- }
-
- return packed_list;
-}
-
-
-static PyObject *
-pytdbpack_number(char ch, PyObject *val_iter, PyObject *packed_list)
-{
- unsigned long val_long;
- PyObject *val_obj = NULL, *long_obj = NULL, *result_obj = NULL;
- PyObject *new_list = NULL;
- unsigned char pack_buf[4];
-
- if (!(val_obj = PyIter_Next(val_iter)))
- goto out;
-
- if (!(long_obj = PyNumber_Long(val_obj))) {
- pytdbpack_bad_type(ch, "Number", val_obj);
- goto out;
- }
-
- val_long = PyLong_AsUnsignedLong(long_obj);
- pack_le_uint32(val_long, pack_buf);
-
- /* pack as 32-bit; if just packing a 'w' 16-bit word then only take
- the first two bytes. */
-
- if (!(result_obj = PyString_FromStringAndSize(pack_buf, ch == 'w' ? 2 : 4)))
- goto out;
-
- if (PyList_Append(packed_list, result_obj) != -1)
- new_list = packed_list;
-
- out:
- Py_XDECREF(val_obj);
- Py_XDECREF(long_obj);
- Py_XDECREF(result_obj);
-
- return new_list;
-}
-
-
-/*
- * Take one string from the iterator val_iter, convert it to 8-bit CP850, and
- * return it.
- *
- * If the input is neither a string nor Unicode, an exception is raised.
- *
- * If the input is Unicode, then it is converted to CP850.
- *
- * If the input is a String, then it is converted to Unicode using the default
- * decoding method, and then converted to CP850. This in effect gives
- * conversion from latin-1 (currently the PSA's default) to CP850, without
- * needing a custom translation table.
- *
- * I hope this approach avoids being too fragile w.r.t. being passed either
- * Unicode or String objects.
- */
-static PyObject *
-pytdbpack_str_850(PyObject *val_iter, PyObject *packed_list)
-{
- PyObject *val_obj = NULL;
- PyObject *unicode_obj = NULL;
- PyObject *cp850_str = NULL;
- PyObject *nul_str = NULL;
- PyObject *new_list = NULL;
-
- if (!(val_obj = PyIter_Next(val_iter)))
- goto out;
-
- if (PyUnicode_Check(val_obj)) {
- unicode_obj = val_obj;
- }
- else {
- /* string */
- if (!(unicode_obj = PyString_AsDecodedObject(val_obj, NULL, NULL)))
- goto out;
- Py_XDECREF(val_obj);
- val_obj = NULL;
- }
-
- if (!(cp850_str = PyUnicode_AsEncodedString(unicode_obj, "cp850", NULL)))
- goto out;
-
- if (!nul_str)
- /* this is constant and often-used; hold it forever */
- if (!(nul_str = PyString_FromStringAndSize("", 1)))
- goto out;
-
- if ((PyList_Append(packed_list, cp850_str) != -1)
- && (PyList_Append(packed_list, nul_str) != -1))
- new_list = packed_list;
-
- out:
- Py_XDECREF(unicode_obj);
- Py_XDECREF(cp850_str);
-
- return new_list;
-}
-
-
-/*
- * Pack (LENGTH, BUFFER) pair onto the list.
- *
- * The buffer must already be a String, not Unicode, because it contains 8-bit
- * untranslated data. In some cases it will actually be UTF_16_LE data.
- */
-static PyObject *
-pytdbpack_buffer(PyObject *val_iter, PyObject *packed_list)
-{
- PyObject *val_obj;
- PyObject *new_list = NULL;
-
- /* pull off integer and stick onto list */
- if (!(packed_list = pytdbpack_number('d', val_iter, packed_list)))
+ /* Convert into a list or tuple (if not already one), so that we can
+ * index more easily. */
+ fast_seq = PySequence_Fast(val_seq,
+ __FUNCTION__ ": argument 2 must be sequence");
+ if (!fast_seq)
return NULL;
-
- /* this assumes that the string is the right length; the old code did the same. */
- if (!(val_obj = PyIter_Next(val_iter)))
+
+ reqd_len = pytdbpack_calc_reqd_len(format_str, fast_seq);
+ if (reqd_len == -1) /* exception was thrown */
return NULL;
- if (!PyString_Check(val_obj)) {
- pytdbpack_bad_type('B', "String", val_obj);
- goto out;
- }
+ /* Allocate space.
+
+ This design causes an unnecessary copying of the data when Python
+ constructs an object, and that might possibly be avoided by using a
+ Buffer object of some kind instead. I'm not doing that for now
+ though. */
+ packed_buf = malloc(reqd_len);
+ if (!packed_buf) {
+ PyErr_Format(PyExc_MemoryError,
+ "%s: couldn't allocate %d bytes for packed buffer",
+ __FUNCTION__, reqd_len);
+ return NULL;
+ }
- if (PyList_Append(packed_list, val_obj) != -1)
- new_list = packed_list;
+ if (!pytdbpack_pack_data(format_str, fast_seq, packed_buf)) {
+ free(packed_buf);
+ return NULL;
+ }
- out:
- Py_XDECREF(val_obj);
- return new_list;
+ buf_str = PyString_FromStringAndSize(packed_buf, reqd_len);
+ free(packed_buf); /* get rid of tmp buf */
+
+ return buf_str;
}
-#if 0
-else if (ch == 'B') {
- long size;
- char *sval;
-
- if (!PyNumber_Check(val_obj)) {
- pytdbpack_bad_type(ch, "Number", val_obj);
- return NULL;
- }
-
- if (!(val_obj = PyNumber_Long(val_obj)))
- return NULL;
-
- size = PyLong_AsLong(val_obj);
- pack_le_uint32(size, &packed);
-
- /* Release the new reference created by the cast */
- Py_DECREF(val_obj);
-
- val_obj = PySequence_GetItem(val_seq, val_i++);
- if (!val_obj)
- return NULL;
-
- sval = PyString_AsString(val_obj);
- if (!sval)
- return NULL;
-
- pack_bytes(size, sval, &packed); /* do not include nul */
- }
- else {
-
- }
-
- return Py_None;
-}
-#endif
static PyObject *
pytdbpack_unpack(PyObject *self,
@@ -484,8 +270,6 @@ pytdbpack_unpack(PyObject *self,
}
-
-#if 0
/*
Internal routine that calculates how many bytes will be required to
encode the values in the format.
@@ -577,7 +361,6 @@ pytdbpack_calc_reqd_len(char *format_str,
return len;
}
-#endif
static PyObject *pytdbpack_bad_type(char ch,
@@ -601,12 +384,13 @@ static PyObject *pytdbpack_bad_type(char ch,
realize this is kind of dumb because we'll almost always be on x86, but
being safe is important.
*/
-static void pack_le_uint32(unsigned long val_long, unsigned char *pbuf)
+static void pack_uint32(unsigned long val_long, unsigned char **pbuf)
{
- pbuf[0] = val_long & 0xff;
- pbuf[1] = (val_long >> 8) & 0xff;
- pbuf[2] = (val_long >> 16) & 0xff;
- pbuf[3] = (val_long >> 24) & 0xff;
+ (*pbuf)[0] = val_long & 0xff;
+ (*pbuf)[1] = (val_long >> 8) & 0xff;
+ (*pbuf)[2] = (val_long >> 16) & 0xff;
+ (*pbuf)[3] = (val_long >> 24) & 0xff;
+ (*pbuf) += 4;
}
@@ -797,10 +581,126 @@ static PyObject *pytdbpack_unpack_item(char ch,
+/*
+ Pack data according to FORMAT_STR from the elements of VAL_SEQ into
+ PACKED_BUF.
+
+ The string has already been checked out, so we know that VAL_SEQ is large
+ enough to hold the packed data, and that there are enough value items.
+ (However, their types may not have been thoroughly checked yet.)
+
+ In addition, val_seq is a Python Fast sequence.
+
+ Returns NULL for error (with exception set), or None.
+*/
+PyObject *
+pytdbpack_pack_data(const char *format_str,
+ PyObject *val_seq,
+ unsigned char *packed)
+{
+ int format_i, val_i = 0;
+
+ for (format_i = 0, val_i = 0; format_str[format_i]; format_i++) {
+ char ch = format_str[format_i];
+ PyObject *val_obj;
+
+ /* borrow a reference to the item */
+ val_obj = PySequence_GetItem(val_seq, val_i++);
+ if (!val_obj)
+ return NULL;
+
+ if (ch == 'w') {
+ unsigned long val_long;
+ PyObject *long_obj;
+
+ if (!(long_obj = PyNumber_Long(val_obj))) {
+ pytdbpack_bad_type(ch, "Long", val_obj);
+ return NULL;
+ }
+
+ val_long = PyLong_AsUnsignedLong(long_obj);
+ (packed)[0] = val_long & 0xff;
+ (packed)[1] = (val_long >> 8) & 0xff;
+ (packed) += 2;
+ Py_DECREF(long_obj);
+ }
+ else if (ch == 'd') {
+ /* 4-byte LE number */
+ PyObject *long_obj;
+
+ if (!(long_obj = PyNumber_Long(val_obj))) {
+ pytdbpack_bad_type(ch, "Long", val_obj);
+ return NULL;
+ }
+
+ pack_uint32(PyLong_AsUnsignedLong(long_obj), &packed);
+
+ Py_DECREF(long_obj);
+ }
+ else if (ch == 'p') {
+ /* "Pointer" value -- in the subset of DCERPC used by Samba,
+ this is really just an "exists" or "does not exist"
+ flag. */
+ pack_uint32(PyObject_IsTrue(val_obj), &packed);
+ }
+ else if (ch == 'f' || ch == 'P') {
+ int size;
+ char *sval;
+
+ size = PySequence_Length(val_obj);
+ if (size < 0)
+ return NULL;
+ sval = PyString_AsString(val_obj);
+ if (!sval)
+ return NULL;
+ pack_bytes(size+1, sval, &packed); /* include nul */
+ }
+ else if (ch == 'B') {
+ long size;
+ char *sval;
+
+ if (!PyNumber_Check(val_obj)) {
+ pytdbpack_bad_type(ch, "Number", val_obj);
+ return NULL;
+ }
+
+ if (!(val_obj = PyNumber_Long(val_obj)))
+ return NULL;
+
+ size = PyLong_AsLong(val_obj);
+ pack_uint32(size, &packed);
+
+ /* Release the new reference created by the cast */
+ Py_DECREF(val_obj);
+
+ val_obj = PySequence_GetItem(val_seq, val_i++);
+ if (!val_obj)
+ return NULL;
+
+ sval = PyString_AsString(val_obj);
+ if (!sval)
+ return NULL;
+
+ pack_bytes(size, sval, &packed); /* do not include nul */
+ }
+ else {
+ /* this ought to be caught while calculating the length, but
+ just in case. */
+ PyErr_Format(PyExc_ValueError,
+ "%s: format character '%c' is not supported",
+ __FUNCTION__, ch);
+
+ return NULL;
+ }
+ }
+
+ return Py_None;
+}
+
static PyMethodDef pytdbpack_methods[] = {
- { "pack", pytdbpack, METH_VARARGS, (char *) pytdbpack_doc },
+ { "pack", pytdbpack_pack, METH_VARARGS, (char *) pytdbpack_pack_doc },
{ "unpack", pytdbpack_unpack, METH_VARARGS, (char *) pytdbpack_unpack_doc },
};