1 files changed, 34 insertions, 64 deletions
diff --git a/source3/python/py_tdbpack.c b/source3/python/py_tdbpack.c
index 7a03f830ee..d8c3d46773 100644
--- a/source3/python/py_tdbpack.c
+++ b/source3/python/py_tdbpack.c
@@ -28,9 +28,7 @@
 #include "Python.h"
 
 static PyObject * pytdbpack_number(char ch, PyObject *val_iter, PyObject *packed_list);
-static PyObject * pytdbpack_str(char ch,
-				PyObject *val_iter, PyObject *packed_list,
-				const char *encoding);
+static PyObject * pytdbpack_str_850(PyObject *val_iter, PyObject *packed_list);
 static PyObject * pytdbpack_buffer(PyObject *val_iter, PyObject *packed_list);
 
 static PyObject *pytdbunpack_item(char, char **pbuf, int *plen, PyObject *);
@@ -39,9 +37,6 @@ static PyObject *pytdbpack_data(const char *format_str,
 				     PyObject *val_seq,
 				     PyObject *val_list);
 
-static PyObject *
-pytdbunpack_string(char **pbuf, int *plen, const char *encoding);
-
 static void pack_le_uint32(unsigned long val_long, unsigned char *pbuf);
 
 
@@ -63,17 +58,19 @@ tdb/tdbutil module, with appropriate adjustments for Python datatypes.
 Python strings are used to specify the format of data to be packed or
 unpacked.
 
-String encodings are implied by the database format: they may be either DOS
-codepage (currently hardcoded to 850), or Unix codepage (currently hardcoded
-to be the same as the default Python encoding).
+Strings are always stored in codepage 850.  Unicode objects are translated
+to cp850; plain strings are assumed to be in latin-1 and are also
+translated.
+
+This may be a problem in the future if it is different to the Samba codepage.
+It might be better to have the caller do the conversion, but that would conflict
+with existing CMI code.
 
 tdbpack format strings:
 
-    'f': NUL-terminated string in codepage 850
-   
-    'P': same as 'f'
+    'f':  NULL-terminated string in codepage 850
 
-    'F': NUL-terminated string in iso-8859-1
+    'P':  same as 'f'
 
     'd':  4 byte little-endian unsigned number
 
@@ -148,11 +145,7 @@ notes:
 ";
 
 
-const char *pytdb_dos_encoding = "cp850";
-
-/* NULL, meaning that the Samba default encoding *must* be the same as the
-   Python default encoding. */
-const char *pytdb_unix_encoding = NULL;
+const char *pytdb_string_encoding = "cp850";
 
 
 /*
@@ -235,14 +228,7 @@ pytdbpack_data(const char *format_str,
 
 		case 'f':
 		case 'P':
-			if (!(packed_list = pytdbpack_str(ch, val_iter, packed_list, pytdb_dos_encoding)))
-				return NULL;
-			break;
-
-		case 'F':
-			/* We specify NULL encoding: Samba databases in this
-			   form are written in the default Python encoding. */
-			if (!(packed_list = pytdbpack_str(ch, val_iter, packed_list, pytdb_unix_encoding)))
+			if (!(packed_list = pytdbpack_str_850(val_iter, packed_list)))
 				return NULL;
 			break;
 
@@ -301,29 +287,27 @@ pytdbpack_number(char ch, PyObject *val_iter, PyObject *packed_list)
 
 
 /*
- * Take one string from the iterator val_iter, convert it to 8-bit, and return
- * it.
+ * Take one string from the iterator val_iter, convert it to 8-bit CP850, and
+ * return it.
  *
  * If the input is neither a string nor Unicode, an exception is raised.
  *
- * If the input is Unicode, then it is converted to the appropriate encoding.
+ * If the input is Unicode, then it is converted to CP850.
  *
- * If the input is a String, and encoding is not null, then it is converted to
- * Unicode using the default decoding method, and then converted to the
- * encoding.  If the encoding is NULL, then the string is written out as-is --
- * this is used when the default Python encoding is the same as the Samba
- * encoding.
+ * If the input is a String, then it is converted to Unicode using the default
+ * decoding method, and then converted to CP850.  This in effect gives
+ * conversion from latin-1 (currently the PSA's default) to CP850, without
+ * needing a custom translation table.
  *
  * I hope this approach avoids being too fragile w.r.t. being passed either
  * Unicode or String objects.
  */
 static PyObject *
-pytdbpack_str(char ch,
-	      PyObject *val_iter, PyObject *packed_list, const char *encoding)
+pytdbpack_str_850(PyObject *val_iter, PyObject *packed_list)
 {
 	PyObject *val_obj = NULL;
 	PyObject *unicode_obj = NULL;
-	PyObject *coded_str = NULL;
+	PyObject *cp850_str = NULL;
 	PyObject *nul_str = NULL;
 	PyObject *new_list = NULL;
 
@@ -331,41 +315,31 @@ pytdbpack_str(char ch,
 		goto out;
 
 	if (PyUnicode_Check(val_obj)) {
-		if (!(coded_str = PyUnicode_AsEncodedString(val_obj, encoding, NULL)))
-			goto out;
+		unicode_obj = val_obj;
 	}
-	else if (PyString_Check(val_obj) && !encoding) {
-		/* For efficiency, we assume that the Python interpreter has
-		   the same default string encoding as Samba's native string
-		   encoding.  On the PSA, both are always 8859-1. */
-		coded_str = val_obj;
-		Py_INCREF(coded_str);
-	}
-	else if (PyString_Check(val_obj)) {
-		/* String, but needs to be converted */
+	else {
+		/* string */
 		if (!(unicode_obj = PyString_AsDecodedObject(val_obj, NULL, NULL)))
 			goto out;
-		if (!(coded_str = PyUnicode_AsEncodedString(unicode_obj, encoding, NULL)))
-			goto out;
+		Py_XDECREF(val_obj);
+		val_obj = NULL;
 	}
-	else {
-		pytdbpack_bad_type(ch, "String or Unicode", val_obj);
+
+	if (!(cp850_str = PyUnicode_AsEncodedString(unicode_obj, pytdb_string_encoding, NULL)))
 		goto out;
-	}
 
 	if (!nul_str)
 		/* this is constant and often-used; hold it forever */
 		if (!(nul_str = PyString_FromStringAndSize("", 1)))
 			goto out;
 
-	if ((PyList_Append(packed_list, coded_str) != -1)
+	if ((PyList_Append(packed_list, cp850_str) != -1)
 	    && (PyList_Append(packed_list, nul_str) != -1))
 		new_list = packed_list;
 
   out:
-	Py_XDECREF(val_obj);
 	Py_XDECREF(unicode_obj);
-	Py_XDECREF(coded_str);
+	Py_XDECREF(cp850_str);
 
 	return new_list;
 }
@@ -387,8 +361,7 @@ pytdbpack_buffer(PyObject *val_iter, PyObject *packed_list)
 	if (!(packed_list = pytdbpack_number('d', val_iter, packed_list)))
 		return NULL;
 
-	/* this assumes that the string is the right length; the old code did
-	   the same. */
+	/* this assumes that the string is the right length; the old code did the same. */
 	if (!(val_obj = PyIter_Next(val_iter)))
 		return NULL;
 
@@ -564,7 +537,7 @@ static PyObject *pytdbunpack_int16(char **pbuf, int *plen)
 
 
 static PyObject *
-pytdbunpack_string(char **pbuf, int *plen, const char *encoding)
+pytdbunpack_string(char **pbuf, int *plen)
 {
 	int len;
 	char *nul_ptr, *start;
@@ -582,7 +555,7 @@ pytdbunpack_string(char **pbuf, int *plen, const char *encoding)
 	*pbuf += len + 1;	/* skip \0 */
 	*plen -= len + 1;
 
-	return PyString_Decode(start, len, encoding, NULL);
+	return PyString_Decode(start, len, pytdb_string_encoding, NULL);
 }
 
 
@@ -668,10 +641,7 @@ static PyObject *pytdbunpack_item(char ch,
 		result = pytdbunpack_uint32(pbuf, plen);
 	}
 	else if (ch == 'f' || ch == 'P') { /* nul-term string  */
-		result = pytdbunpack_string(pbuf, plen, pytdb_dos_encoding);
-	}
-	else if (ch == 'F') { /* nul-term string  */
-		result = pytdbunpack_string(pbuf, plen, pytdb_unix_encoding);
+		result = pytdbunpack_string(pbuf, plen);
 	}
 	else if (ch == 'B') { /* length, buffer */
 		return pytdbunpack_buffer(pbuf, plen, val_list);