diff options
author | Martin Pool <mbp@samba.org> | 2002-11-09 01:47:54 +0000 |
---|---|---|
committer | Martin Pool <mbp@samba.org> | 2002-11-09 01:47:54 +0000 |
commit | 5fabbb0573095d05695ab4904683f6939d6d9e37 (patch) | |
tree | 60410d5079ea27cd42a8a65c389d2dad29a2f9c7 /source3 | |
parent | cc9b6ef9cadf31b576609308d24657e921c644d1 (diff) | |
download | samba-5fabbb0573095d05695ab4904683f6939d6d9e37.tar.gz samba-5fabbb0573095d05695ab4904683f6939d6d9e37.tar.bz2 samba-5fabbb0573095d05695ab4904683f6939d6d9e37.zip |
Rewrite all the packing stuff so that it converts 'f' strings to CP850
on encoding. (Tough luck if your Samba codepage is not 850.) It's
much cleaner and possibly more efficient now too: just build one big
list of strings, then zip it up at the end.
Still need to do this for unpacking.
(This used to be commit 2480d385db0527b71cc5cd23c4fab4655df84485)
Diffstat (limited to 'source3')
-rw-r--r-- | source3/python/py_tdbpack.c | 448 |
1 files changed, 274 insertions, 174 deletions
diff --git a/source3/python/py_tdbpack.c b/source3/python/py_tdbpack.c index 87cd804ed4..7180c3e12c 100644 --- a/source3/python/py_tdbpack.c +++ b/source3/python/py_tdbpack.c @@ -27,16 +27,17 @@ #include "Python.h" -static int pytdbpack_calc_reqd_len(char *format_str, - PyObject *val_seq); +static PyObject * pytdbpack_number(char ch, PyObject *val_iter, PyObject *packed_list); +static PyObject * pytdbpack_str_850(PyObject *val_iter, PyObject *packed_list); +static PyObject * pytdbpack_buffer(PyObject *val_iter, PyObject *packed_list); static PyObject *pytdbpack_unpack_item(char, char **pbuf, int *plen, PyObject *); -static PyObject *pytdbpack_pack_data(const char *format_str, +static PyObject *pytdbpack_data(const char *format_str, PyObject *val_seq, - unsigned char *buf); - + PyObject *val_list); +static void pack_le_uint32(unsigned long val_long, unsigned char *pbuf); static PyObject *pytdbpack_bad_type(char ch, @@ -57,13 +58,17 @@ tdb/tdbutil module, with appropriate adjustments for Python datatypes. Python strings are used to specify the format of data to be packed or unpacked. -Strings in TDBs are typically stored in DOS codepages. The caller of this -module must make appropriate translations if necessary, typically to and from -Unicode objects. +Strings are always stored in codepage 850. Unicode objects are translated +to cp850; plain strings are assumed to be in latin-1 and are also +translated. + +This may be a problem in the future if it is different to the Samba codepage. +It might be better to have the caller do the conversion, but that would conflict +with existing CMI code. tdbpack format strings: - 'f': NULL-terminated string in DOS codepage + 'f': NULL-terminated string in codepage 850 'P': same as 'f' @@ -88,7 +93,7 @@ tdbpack format strings: "; -static char const pytdbpack_pack_doc[] = +static char const pytdbpack_doc[] = "pack(format, values) -> buffer Pack Python objects into Samba binary format according to format string. @@ -141,65 +146,274 @@ notes: + /* - Game plan is to first of all walk through the arguments and calculate the - total length that will be required. We allocate a Python string of that - size, then walk through again and fill it in. - - We just borrow references to all the passed arguments, since none of them - need to be permanently stored. We transfer ownership to the returned - object. - */ + * Pack objects to bytes. + * + * All objects are first individually encoded onto a list, and then the list + * of strings is concatenated. This is faster than concatenating strings, + * and reasonably simple to code. + */ static PyObject * -pytdbpack_pack(PyObject *self, +pytdbpack(PyObject *self, PyObject *args) { char *format_str; - PyObject *val_seq, *fast_seq, *buf_str; - int reqd_len; - char *packed_buf; + PyObject *val_seq, *val_iter = NULL, + *packed_list = NULL, *packed_str = NULL, + *empty_str = NULL; /* TODO: Test passing wrong types or too many arguments */ if (!PyArg_ParseTuple(args, "sO", &format_str, &val_seq)) return NULL; - /* Convert into a list or tuple (if not already one), so that we can - * index more easily. */ - fast_seq = PySequence_Fast(val_seq, - __FUNCTION__ ": argument 2 must be sequence"); - if (!fast_seq) - return NULL; - - reqd_len = pytdbpack_calc_reqd_len(format_str, fast_seq); - if (reqd_len == -1) /* exception was thrown */ - return NULL; + if (!(val_iter = PyObject_GetIter(val_seq))) + goto out; - /* Allocate space. - - This design causes an unnecessary copying of the data when Python - constructs an object, and that might possibly be avoided by using a - Buffer object of some kind instead. I'm not doing that for now - though. */ - packed_buf = malloc(reqd_len); - if (!packed_buf) { - PyErr_Format(PyExc_MemoryError, - "%s: couldn't allocate %d bytes for packed buffer", - __FUNCTION__, reqd_len); - return NULL; - } + /* Create list to hold strings until we're done, then join them all. */ + if (!(packed_list = PyList_New(0))) + goto out; + + if (!pytdbpack_data(format_str, val_iter, packed_list)) + goto out; + + /* this function is not officially documented but it works */ + if (!(empty_str = PyString_InternFromString(""))) + goto out; - if (!pytdbpack_pack_data(format_str, fast_seq, packed_buf)) { - free(packed_buf); - return NULL; + packed_str = _PyString_Join(empty_str, packed_list); + + out: + Py_XDECREF(empty_str); + Py_XDECREF(val_iter); + Py_XDECREF(packed_list); + + return packed_str; +} + + +/* + Pack data according to FORMAT_STR from the elements of VAL_SEQ into + PACKED_BUF. + + The string has already been checked out, so we know that VAL_SEQ is large + enough to hold the packed data, and that there are enough value items. + (However, their types may not have been thoroughly checked yet.) + + In addition, val_seq is a Python Fast sequence. + + Returns NULL for error (with exception set), or None. +*/ +PyObject * +pytdbpack_data(const char *format_str, + PyObject *val_iter, + PyObject *packed_list) +{ + int format_i, val_i = 0; + + for (format_i = 0, val_i = 0; format_str[format_i]; format_i++) { + char ch = format_str[format_i]; + + switch (ch) { + /* dispatch to the appropriate packer for this type, + which should pull things off the iterator, and + append them to the packed_list */ + case 'w': + case 'd': + case 'p': + if (!(packed_list = pytdbpack_number(ch, val_iter, packed_list))) + return NULL; + break; + + case 'f': + case 'P': + if (!(packed_list = pytdbpack_str_850(val_iter, packed_list))) + return NULL; + break; + + case 'B': + if (!(packed_list = pytdbpack_buffer(val_iter, packed_list))) + return NULL; + break; + + default: + PyErr_Format(PyExc_ValueError, + "%s: format character '%c' is not supported", + __FUNCTION__, ch); + return NULL; + } + } + + return packed_list; +} + + +static PyObject * +pytdbpack_number(char ch, PyObject *val_iter, PyObject *packed_list) +{ + unsigned long val_long; + PyObject *val_obj = NULL, *long_obj = NULL, *result_obj = NULL; + PyObject *new_list = NULL; + unsigned char pack_buf[4]; + + if (!(val_obj = PyIter_Next(val_iter))) + goto out; + + if (!(long_obj = PyNumber_Long(val_obj))) { + pytdbpack_bad_type(ch, "Number", val_obj); + goto out; } - buf_str = PyString_FromStringAndSize(packed_buf, reqd_len); - free(packed_buf); /* get rid of tmp buf */ + val_long = PyLong_AsUnsignedLong(long_obj); + pack_le_uint32(val_long, pack_buf); + + /* pack as 32-bit; if just packing a 'w' 16-bit word then only take + the first two bytes. */ - return buf_str; + if (!(result_obj = PyString_FromStringAndSize(pack_buf, ch == 'w' ? 2 : 4))) + goto out; + + if (PyList_Append(packed_list, result_obj) != -1) + new_list = packed_list; + + out: + Py_XDECREF(val_obj); + Py_XDECREF(long_obj); + Py_XDECREF(result_obj); + + return new_list; +} + + +/* + * Take one string from the iterator val_iter, convert it to 8-bit CP850, and + * return it. + * + * If the input is neither a string nor Unicode, an exception is raised. + * + * If the input is Unicode, then it is converted to CP850. + * + * If the input is a String, then it is converted to Unicode using the default + * decoding method, and then converted to CP850. This in effect gives + * conversion from latin-1 (currently the PSA's default) to CP850, without + * needing a custom translation table. + * + * I hope this approach avoids being too fragile w.r.t. being passed either + * Unicode or String objects. + */ +static PyObject * +pytdbpack_str_850(PyObject *val_iter, PyObject *packed_list) +{ + PyObject *val_obj = NULL; + PyObject *unicode_obj = NULL; + PyObject *cp850_str = NULL; + PyObject *nul_str = NULL; + PyObject *new_list = NULL; + + if (!(val_obj = PyIter_Next(val_iter))) + goto out; + + if (PyUnicode_Check(val_obj)) { + unicode_obj = val_obj; + } + else { + /* string */ + if (!(unicode_obj = PyString_AsDecodedObject(val_obj, NULL, NULL))) + goto out; + Py_XDECREF(val_obj); + val_obj = NULL; + } + + if (!(cp850_str = PyUnicode_AsEncodedString(unicode_obj, "cp850", NULL))) + goto out; + + if (!nul_str) + /* this is constant and often-used; hold it forever */ + if (!(nul_str = PyString_FromStringAndSize("", 1))) + goto out; + + if ((PyList_Append(packed_list, cp850_str) != -1) + && (PyList_Append(packed_list, nul_str) != -1)) + new_list = packed_list; + + out: + Py_XDECREF(unicode_obj); + Py_XDECREF(cp850_str); + + return new_list; } +/* + * Pack (LENGTH, BUFFER) pair onto the list. + * + * The buffer must already be a String, not Unicode, because it contains 8-bit + * untranslated data. In some cases it will actually be UTF_16_LE data. + */ +static PyObject * +pytdbpack_buffer(PyObject *val_iter, PyObject *packed_list) +{ + PyObject *val_obj; + PyObject *new_list = NULL; + + /* pull off integer and stick onto list */ + if (!(packed_list = pytdbpack_number('d', val_iter, packed_list))) + return NULL; + + /* this assumes that the string is the right length; the old code did the same. */ + if (!(val_obj = PyIter_Next(val_iter))) + return NULL; + + if (!PyString_Check(val_obj)) { + pytdbpack_bad_type('B', "String", val_obj); + goto out; + } + + if (PyList_Append(packed_list, val_obj) != -1) + new_list = packed_list; + + out: + Py_XDECREF(val_obj); + return new_list; +} + + +#if 0 +else if (ch == 'B') { + long size; + char *sval; + + if (!PyNumber_Check(val_obj)) { + pytdbpack_bad_type(ch, "Number", val_obj); + return NULL; + } + + if (!(val_obj = PyNumber_Long(val_obj))) + return NULL; + + size = PyLong_AsLong(val_obj); + pack_le_uint32(size, &packed); + + /* Release the new reference created by the cast */ + Py_DECREF(val_obj); + + val_obj = PySequence_GetItem(val_seq, val_i++); + if (!val_obj) + return NULL; + + sval = PyString_AsString(val_obj); + if (!sval) + return NULL; + + pack_bytes(size, sval, &packed); /* do not include nul */ + } + else { + + } + + return Py_None; +} +#endif static PyObject * pytdbpack_unpack(PyObject *self, @@ -270,6 +484,8 @@ pytdbpack_unpack(PyObject *self, } + +#if 0 /* Internal routine that calculates how many bytes will be required to encode the values in the format. @@ -361,6 +577,7 @@ pytdbpack_calc_reqd_len(char *format_str, return len; } +#endif static PyObject *pytdbpack_bad_type(char ch, @@ -384,13 +601,12 @@ static PyObject *pytdbpack_bad_type(char ch, realize this is kind of dumb because we'll almost always be on x86, but being safe is important. */ -static void pack_uint32(unsigned long val_long, unsigned char **pbuf) +static void pack_le_uint32(unsigned long val_long, unsigned char *pbuf) { - (*pbuf)[0] = val_long & 0xff; - (*pbuf)[1] = (val_long >> 8) & 0xff; - (*pbuf)[2] = (val_long >> 16) & 0xff; - (*pbuf)[3] = (val_long >> 24) & 0xff; - (*pbuf) += 4; + pbuf[0] = val_long & 0xff; + pbuf[1] = (val_long >> 8) & 0xff; + pbuf[2] = (val_long >> 16) & 0xff; + pbuf[3] = (val_long >> 24) & 0xff; } @@ -581,126 +797,10 @@ static PyObject *pytdbpack_unpack_item(char ch, -/* - Pack data according to FORMAT_STR from the elements of VAL_SEQ into - PACKED_BUF. - - The string has already been checked out, so we know that VAL_SEQ is large - enough to hold the packed data, and that there are enough value items. - (However, their types may not have been thoroughly checked yet.) - - In addition, val_seq is a Python Fast sequence. - - Returns NULL for error (with exception set), or None. -*/ -PyObject * -pytdbpack_pack_data(const char *format_str, - PyObject *val_seq, - unsigned char *packed) -{ - int format_i, val_i = 0; - - for (format_i = 0, val_i = 0; format_str[format_i]; format_i++) { - char ch = format_str[format_i]; - PyObject *val_obj; - - /* borrow a reference to the item */ - val_obj = PySequence_GetItem(val_seq, val_i++); - if (!val_obj) - return NULL; - - if (ch == 'w') { - unsigned long val_long; - PyObject *long_obj; - - if (!(long_obj = PyNumber_Long(val_obj))) { - pytdbpack_bad_type(ch, "Long", val_obj); - return NULL; - } - - val_long = PyLong_AsUnsignedLong(long_obj); - (packed)[0] = val_long & 0xff; - (packed)[1] = (val_long >> 8) & 0xff; - (packed) += 2; - Py_DECREF(long_obj); - } - else if (ch == 'd') { - /* 4-byte LE number */ - PyObject *long_obj; - - if (!(long_obj = PyNumber_Long(val_obj))) { - pytdbpack_bad_type(ch, "Long", val_obj); - return NULL; - } - - pack_uint32(PyLong_AsUnsignedLong(long_obj), &packed); - - Py_DECREF(long_obj); - } - else if (ch == 'p') { - /* "Pointer" value -- in the subset of DCERPC used by Samba, - this is really just an "exists" or "does not exist" - flag. */ - pack_uint32(PyObject_IsTrue(val_obj), &packed); - } - else if (ch == 'f' || ch == 'P') { - int size; - char *sval; - - size = PySequence_Length(val_obj); - if (size < 0) - return NULL; - sval = PyString_AsString(val_obj); - if (!sval) - return NULL; - pack_bytes(size+1, sval, &packed); /* include nul */ - } - else if (ch == 'B') { - long size; - char *sval; - - if (!PyNumber_Check(val_obj)) { - pytdbpack_bad_type(ch, "Number", val_obj); - return NULL; - } - - if (!(val_obj = PyNumber_Long(val_obj))) - return NULL; - - size = PyLong_AsLong(val_obj); - pack_uint32(size, &packed); - - /* Release the new reference created by the cast */ - Py_DECREF(val_obj); - - val_obj = PySequence_GetItem(val_seq, val_i++); - if (!val_obj) - return NULL; - - sval = PyString_AsString(val_obj); - if (!sval) - return NULL; - - pack_bytes(size, sval, &packed); /* do not include nul */ - } - else { - /* this ought to be caught while calculating the length, but - just in case. */ - PyErr_Format(PyExc_ValueError, - "%s: format character '%c' is not supported", - __FUNCTION__, ch); - - return NULL; - } - } - - return Py_None; -} - static PyMethodDef pytdbpack_methods[] = { - { "pack", pytdbpack_pack, METH_VARARGS, (char *) pytdbpack_pack_doc }, + { "pack", pytdbpack, METH_VARARGS, (char *) pytdbpack_doc }, { "unpack", pytdbpack_unpack, METH_VARARGS, (char *) pytdbpack_unpack_doc }, }; |