diff options
Diffstat (limited to 'source3/python')
-rw-r--r-- | source3/python/py_tdbpack.c | 448 |
1 files changed, 174 insertions, 274 deletions
diff --git a/source3/python/py_tdbpack.c b/source3/python/py_tdbpack.c index 7180c3e12c..87cd804ed4 100644 --- a/source3/python/py_tdbpack.c +++ b/source3/python/py_tdbpack.c @@ -27,17 +27,16 @@ #include "Python.h" -static PyObject * pytdbpack_number(char ch, PyObject *val_iter, PyObject *packed_list); -static PyObject * pytdbpack_str_850(PyObject *val_iter, PyObject *packed_list); -static PyObject * pytdbpack_buffer(PyObject *val_iter, PyObject *packed_list); +static int pytdbpack_calc_reqd_len(char *format_str, + PyObject *val_seq); static PyObject *pytdbpack_unpack_item(char, char **pbuf, int *plen, PyObject *); -static PyObject *pytdbpack_data(const char *format_str, +static PyObject *pytdbpack_pack_data(const char *format_str, PyObject *val_seq, - PyObject *val_list); + unsigned char *buf); + -static void pack_le_uint32(unsigned long val_long, unsigned char *pbuf); static PyObject *pytdbpack_bad_type(char ch, @@ -58,17 +57,13 @@ tdb/tdbutil module, with appropriate adjustments for Python datatypes. Python strings are used to specify the format of data to be packed or unpacked. -Strings are always stored in codepage 850. Unicode objects are translated -to cp850; plain strings are assumed to be in latin-1 and are also -translated. - -This may be a problem in the future if it is different to the Samba codepage. -It might be better to have the caller do the conversion, but that would conflict -with existing CMI code. +Strings in TDBs are typically stored in DOS codepages. The caller of this +module must make appropriate translations if necessary, typically to and from +Unicode objects. tdbpack format strings: - 'f': NULL-terminated string in codepage 850 + 'f': NULL-terminated string in DOS codepage 'P': same as 'f' @@ -93,7 +88,7 @@ tdbpack format strings: "; -static char const pytdbpack_doc[] = +static char const pytdbpack_pack_doc[] = "pack(format, values) -> buffer Pack Python objects into Samba binary format according to format string. @@ -146,274 +141,65 @@ notes: - /* - * Pack objects to bytes. - * - * All objects are first individually encoded onto a list, and then the list - * of strings is concatenated. This is faster than concatenating strings, - * and reasonably simple to code. - */ + Game plan is to first of all walk through the arguments and calculate the + total length that will be required. We allocate a Python string of that + size, then walk through again and fill it in. + + We just borrow references to all the passed arguments, since none of them + need to be permanently stored. We transfer ownership to the returned + object. + */ static PyObject * -pytdbpack(PyObject *self, +pytdbpack_pack(PyObject *self, PyObject *args) { char *format_str; - PyObject *val_seq, *val_iter = NULL, - *packed_list = NULL, *packed_str = NULL, - *empty_str = NULL; + PyObject *val_seq, *fast_seq, *buf_str; + int reqd_len; + char *packed_buf; /* TODO: Test passing wrong types or too many arguments */ if (!PyArg_ParseTuple(args, "sO", &format_str, &val_seq)) return NULL; - if (!(val_iter = PyObject_GetIter(val_seq))) - goto out; - - /* Create list to hold strings until we're done, then join them all. */ - if (!(packed_list = PyList_New(0))) - goto out; - - if (!pytdbpack_data(format_str, val_iter, packed_list)) - goto out; - - /* this function is not officially documented but it works */ - if (!(empty_str = PyString_InternFromString(""))) - goto out; - - packed_str = _PyString_Join(empty_str, packed_list); - - out: - Py_XDECREF(empty_str); - Py_XDECREF(val_iter); - Py_XDECREF(packed_list); - - return packed_str; -} - - -/* - Pack data according to FORMAT_STR from the elements of VAL_SEQ into - PACKED_BUF. - - The string has already been checked out, so we know that VAL_SEQ is large - enough to hold the packed data, and that there are enough value items. - (However, their types may not have been thoroughly checked yet.) - - In addition, val_seq is a Python Fast sequence. - - Returns NULL for error (with exception set), or None. -*/ -PyObject * -pytdbpack_data(const char *format_str, - PyObject *val_iter, - PyObject *packed_list) -{ - int format_i, val_i = 0; - - for (format_i = 0, val_i = 0; format_str[format_i]; format_i++) { - char ch = format_str[format_i]; - - switch (ch) { - /* dispatch to the appropriate packer for this type, - which should pull things off the iterator, and - append them to the packed_list */ - case 'w': - case 'd': - case 'p': - if (!(packed_list = pytdbpack_number(ch, val_iter, packed_list))) - return NULL; - break; - - case 'f': - case 'P': - if (!(packed_list = pytdbpack_str_850(val_iter, packed_list))) - return NULL; - break; - - case 'B': - if (!(packed_list = pytdbpack_buffer(val_iter, packed_list))) - return NULL; - break; - - default: - PyErr_Format(PyExc_ValueError, - "%s: format character '%c' is not supported", - __FUNCTION__, ch); - return NULL; - } - } - - return packed_list; -} - - -static PyObject * -pytdbpack_number(char ch, PyObject *val_iter, PyObject *packed_list) -{ - unsigned long val_long; - PyObject *val_obj = NULL, *long_obj = NULL, *result_obj = NULL; - PyObject *new_list = NULL; - unsigned char pack_buf[4]; - - if (!(val_obj = PyIter_Next(val_iter))) - goto out; - - if (!(long_obj = PyNumber_Long(val_obj))) { - pytdbpack_bad_type(ch, "Number", val_obj); - goto out; - } - - val_long = PyLong_AsUnsignedLong(long_obj); - pack_le_uint32(val_long, pack_buf); - - /* pack as 32-bit; if just packing a 'w' 16-bit word then only take - the first two bytes. */ - - if (!(result_obj = PyString_FromStringAndSize(pack_buf, ch == 'w' ? 2 : 4))) - goto out; - - if (PyList_Append(packed_list, result_obj) != -1) - new_list = packed_list; - - out: - Py_XDECREF(val_obj); - Py_XDECREF(long_obj); - Py_XDECREF(result_obj); - - return new_list; -} - - -/* - * Take one string from the iterator val_iter, convert it to 8-bit CP850, and - * return it. - * - * If the input is neither a string nor Unicode, an exception is raised. - * - * If the input is Unicode, then it is converted to CP850. - * - * If the input is a String, then it is converted to Unicode using the default - * decoding method, and then converted to CP850. This in effect gives - * conversion from latin-1 (currently the PSA's default) to CP850, without - * needing a custom translation table. - * - * I hope this approach avoids being too fragile w.r.t. being passed either - * Unicode or String objects. - */ -static PyObject * -pytdbpack_str_850(PyObject *val_iter, PyObject *packed_list) -{ - PyObject *val_obj = NULL; - PyObject *unicode_obj = NULL; - PyObject *cp850_str = NULL; - PyObject *nul_str = NULL; - PyObject *new_list = NULL; - - if (!(val_obj = PyIter_Next(val_iter))) - goto out; - - if (PyUnicode_Check(val_obj)) { - unicode_obj = val_obj; - } - else { - /* string */ - if (!(unicode_obj = PyString_AsDecodedObject(val_obj, NULL, NULL))) - goto out; - Py_XDECREF(val_obj); - val_obj = NULL; - } - - if (!(cp850_str = PyUnicode_AsEncodedString(unicode_obj, "cp850", NULL))) - goto out; - - if (!nul_str) - /* this is constant and often-used; hold it forever */ - if (!(nul_str = PyString_FromStringAndSize("", 1))) - goto out; - - if ((PyList_Append(packed_list, cp850_str) != -1) - && (PyList_Append(packed_list, nul_str) != -1)) - new_list = packed_list; - - out: - Py_XDECREF(unicode_obj); - Py_XDECREF(cp850_str); - - return new_list; -} - - -/* - * Pack (LENGTH, BUFFER) pair onto the list. - * - * The buffer must already be a String, not Unicode, because it contains 8-bit - * untranslated data. In some cases it will actually be UTF_16_LE data. - */ -static PyObject * -pytdbpack_buffer(PyObject *val_iter, PyObject *packed_list) -{ - PyObject *val_obj; - PyObject *new_list = NULL; - - /* pull off integer and stick onto list */ - if (!(packed_list = pytdbpack_number('d', val_iter, packed_list))) + /* Convert into a list or tuple (if not already one), so that we can + * index more easily. */ + fast_seq = PySequence_Fast(val_seq, + __FUNCTION__ ": argument 2 must be sequence"); + if (!fast_seq) return NULL; - - /* this assumes that the string is the right length; the old code did the same. */ - if (!(val_obj = PyIter_Next(val_iter))) + + reqd_len = pytdbpack_calc_reqd_len(format_str, fast_seq); + if (reqd_len == -1) /* exception was thrown */ return NULL; - if (!PyString_Check(val_obj)) { - pytdbpack_bad_type('B', "String", val_obj); - goto out; - } + /* Allocate space. + + This design causes an unnecessary copying of the data when Python + constructs an object, and that might possibly be avoided by using a + Buffer object of some kind instead. I'm not doing that for now + though. */ + packed_buf = malloc(reqd_len); + if (!packed_buf) { + PyErr_Format(PyExc_MemoryError, + "%s: couldn't allocate %d bytes for packed buffer", + __FUNCTION__, reqd_len); + return NULL; + } - if (PyList_Append(packed_list, val_obj) != -1) - new_list = packed_list; + if (!pytdbpack_pack_data(format_str, fast_seq, packed_buf)) { + free(packed_buf); + return NULL; + } - out: - Py_XDECREF(val_obj); - return new_list; + buf_str = PyString_FromStringAndSize(packed_buf, reqd_len); + free(packed_buf); /* get rid of tmp buf */ + + return buf_str; } -#if 0 -else if (ch == 'B') { - long size; - char *sval; - - if (!PyNumber_Check(val_obj)) { - pytdbpack_bad_type(ch, "Number", val_obj); - return NULL; - } - - if (!(val_obj = PyNumber_Long(val_obj))) - return NULL; - - size = PyLong_AsLong(val_obj); - pack_le_uint32(size, &packed); - - /* Release the new reference created by the cast */ - Py_DECREF(val_obj); - - val_obj = PySequence_GetItem(val_seq, val_i++); - if (!val_obj) - return NULL; - - sval = PyString_AsString(val_obj); - if (!sval) - return NULL; - - pack_bytes(size, sval, &packed); /* do not include nul */ - } - else { - - } - - return Py_None; -} -#endif static PyObject * pytdbpack_unpack(PyObject *self, @@ -484,8 +270,6 @@ pytdbpack_unpack(PyObject *self, } - -#if 0 /* Internal routine that calculates how many bytes will be required to encode the values in the format. @@ -577,7 +361,6 @@ pytdbpack_calc_reqd_len(char *format_str, return len; } -#endif static PyObject *pytdbpack_bad_type(char ch, @@ -601,12 +384,13 @@ static PyObject *pytdbpack_bad_type(char ch, realize this is kind of dumb because we'll almost always be on x86, but being safe is important. */ -static void pack_le_uint32(unsigned long val_long, unsigned char *pbuf) +static void pack_uint32(unsigned long val_long, unsigned char **pbuf) { - pbuf[0] = val_long & 0xff; - pbuf[1] = (val_long >> 8) & 0xff; - pbuf[2] = (val_long >> 16) & 0xff; - pbuf[3] = (val_long >> 24) & 0xff; + (*pbuf)[0] = val_long & 0xff; + (*pbuf)[1] = (val_long >> 8) & 0xff; + (*pbuf)[2] = (val_long >> 16) & 0xff; + (*pbuf)[3] = (val_long >> 24) & 0xff; + (*pbuf) += 4; } @@ -797,10 +581,126 @@ static PyObject *pytdbpack_unpack_item(char ch, +/* + Pack data according to FORMAT_STR from the elements of VAL_SEQ into + PACKED_BUF. + + The string has already been checked out, so we know that VAL_SEQ is large + enough to hold the packed data, and that there are enough value items. + (However, their types may not have been thoroughly checked yet.) + + In addition, val_seq is a Python Fast sequence. + + Returns NULL for error (with exception set), or None. +*/ +PyObject * +pytdbpack_pack_data(const char *format_str, + PyObject *val_seq, + unsigned char *packed) +{ + int format_i, val_i = 0; + + for (format_i = 0, val_i = 0; format_str[format_i]; format_i++) { + char ch = format_str[format_i]; + PyObject *val_obj; + + /* borrow a reference to the item */ + val_obj = PySequence_GetItem(val_seq, val_i++); + if (!val_obj) + return NULL; + + if (ch == 'w') { + unsigned long val_long; + PyObject *long_obj; + + if (!(long_obj = PyNumber_Long(val_obj))) { + pytdbpack_bad_type(ch, "Long", val_obj); + return NULL; + } + + val_long = PyLong_AsUnsignedLong(long_obj); + (packed)[0] = val_long & 0xff; + (packed)[1] = (val_long >> 8) & 0xff; + (packed) += 2; + Py_DECREF(long_obj); + } + else if (ch == 'd') { + /* 4-byte LE number */ + PyObject *long_obj; + + if (!(long_obj = PyNumber_Long(val_obj))) { + pytdbpack_bad_type(ch, "Long", val_obj); + return NULL; + } + + pack_uint32(PyLong_AsUnsignedLong(long_obj), &packed); + + Py_DECREF(long_obj); + } + else if (ch == 'p') { + /* "Pointer" value -- in the subset of DCERPC used by Samba, + this is really just an "exists" or "does not exist" + flag. */ + pack_uint32(PyObject_IsTrue(val_obj), &packed); + } + else if (ch == 'f' || ch == 'P') { + int size; + char *sval; + + size = PySequence_Length(val_obj); + if (size < 0) + return NULL; + sval = PyString_AsString(val_obj); + if (!sval) + return NULL; + pack_bytes(size+1, sval, &packed); /* include nul */ + } + else if (ch == 'B') { + long size; + char *sval; + + if (!PyNumber_Check(val_obj)) { + pytdbpack_bad_type(ch, "Number", val_obj); + return NULL; + } + + if (!(val_obj = PyNumber_Long(val_obj))) + return NULL; + + size = PyLong_AsLong(val_obj); + pack_uint32(size, &packed); + + /* Release the new reference created by the cast */ + Py_DECREF(val_obj); + + val_obj = PySequence_GetItem(val_seq, val_i++); + if (!val_obj) + return NULL; + + sval = PyString_AsString(val_obj); + if (!sval) + return NULL; + + pack_bytes(size, sval, &packed); /* do not include nul */ + } + else { + /* this ought to be caught while calculating the length, but + just in case. */ + PyErr_Format(PyExc_ValueError, + "%s: format character '%c' is not supported", + __FUNCTION__, ch); + + return NULL; + } + } + + return Py_None; +} + static PyMethodDef pytdbpack_methods[] = { - { "pack", pytdbpack, METH_VARARGS, (char *) pytdbpack_doc }, + { "pack", pytdbpack_pack, METH_VARARGS, (char *) pytdbpack_pack_doc }, { "unpack", pytdbpack_unpack, METH_VARARGS, (char *) pytdbpack_unpack_doc }, }; |