summaryrefslogtreecommitdiff
path: root/source3/utils
diff options
context:
space:
mode:
Diffstat (limited to 'source3/utils')
-rw-r--r--source3/utils/make_unicodemap.c308
1 files changed, 308 insertions, 0 deletions
diff --git a/source3/utils/make_unicodemap.c b/source3/utils/make_unicodemap.c
new file mode 100644
index 0000000000..76c49361be
--- /dev/null
+++ b/source3/utils/make_unicodemap.c
@@ -0,0 +1,308 @@
+/*
+ Unix SMB/Netbios implementation.
+ Version 2.0.x.
+ Create unicode map files from unicode_def.XXX files.
+
+ Copyright (C) Jeremy Allison 1997-1999.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*/
+
+#include "includes.h"
+
+static char *prog_name = NULL;
+
+/*
+ * Print program usage and die.
+ */
+
+static void unicode_map_usage(char *progname)
+{
+ fprintf(stderr, "Usage is : %s <codepage> <inputfile> <outputfile>\n",
+ progname);
+ exit(1);
+}
+
+/*
+ * Read a line from a buffer into a line buffer. Ensure null
+ * terminated.
+ */
+
+static void read_line( char **buf, char *line_buf, size_t size)
+{
+ char *p = *buf;
+ size_t num = 0;
+
+ for(; *p && (*p != '\n') && (*p != '\032'); p++) {
+ if(num < (size - 1))
+ line_buf[num++] = *p;
+ }
+ if(*p)
+ p++; /* Go past the '\n' */
+ line_buf[num] = '\0';
+ *buf = p;
+}
+
+/*
+ * Strip comment lines and blank lines from the data.
+ * Copies into a new buffer and frees the old.
+ * Returns the number of lines copied.
+ */
+
+static size_t clean_data( char **buf, size_t *size)
+{
+ pstring linebuf;
+ char *p = *buf;
+ size_t num_lines = 0;
+ char *newbuf = (char *)malloc( *size + 1);
+ char *newbuf_p = NULL;
+
+ if(newbuf == NULL) {
+ fprintf(stderr, "%s: malloc fail for size %u.\n", prog_name, (unsigned int)(*size + 1));
+ exit(1);
+ }
+
+ newbuf_p = newbuf;
+ *newbuf_p = '\0';
+
+ while( *p ) {
+ char *cp;
+
+ read_line( &p, linebuf, sizeof(linebuf));
+ /* Null terminate after comment. */
+ if((cp = strchr( linebuf, '#'))!= NULL)
+ *cp = '\0';
+
+ for(cp = linebuf;*cp && isspace(*cp); cp++)
+ ;
+
+ if(*cp == '\0')
+ continue;
+
+ safe_strcpy(newbuf_p, cp, *size - (newbuf_p - newbuf));
+ num_lines++;
+ newbuf_p += (strlen(newbuf_p) + 1);
+ }
+
+ free(*buf);
+ *buf = newbuf;
+ return num_lines;
+}
+
+/*
+ * Parse a uint16 from a codepage file.
+ */
+
+static BOOL parse_uint16(char *buf, uint16 *uip)
+{
+ unsigned int ui;
+ char *endptr = NULL;
+
+ ui = (unsigned int)strtol(buf, &endptr, 0);
+ if(endptr == buf || ui > 65535)
+ return False;
+
+ *uip = (uint16)ui;
+ return True;
+}
+
+/*
+ * Print a parse error and exit.
+ */
+
+static void parse_error(const char *buf, const char *input_file, const char *msg)
+{
+ fprintf(stderr, "%s: In file %s : %s whilst parsing line \n%s\n", prog_name,
+ input_file, msg, buf);
+ exit(1);
+}
+
+/*
+ * Create a compiled unicode map file from a unicode map definition file.
+ */
+
+static int do_compile(const char *codepage, const char *input_file, const char *output_file)
+{
+ FILE *fp = NULL;
+ size_t size = 0;
+ size_t offset = 0;
+ char *buf = NULL;
+ char *output_buf = NULL;
+ uint16 cp_to_ucs2[65536];
+ uint16 ucs2_to_cp[65536];
+ BOOL multibyte_code_page = False;
+ int num_lines = 0;
+ int i = 0;
+ SMB_STRUCT_STAT st;
+
+ /* Get the size of the input file. Read the entire thing into memory. */
+ if(sys_stat((char *)input_file, &st)!= 0) {
+ fprintf(stderr, "%s: failed to get the file size for file %s. Error was %s\n",
+ prog_name, input_file, strerror(errno));
+ exit(1);
+ }
+
+ size = (size_t)st.st_size;
+
+ if((fp = sys_fopen(input_file, "r")) == NULL) {
+ fprintf(stderr, "%s: cannot open file %s for input.\n", prog_name, input_file);
+ exit(1);
+ }
+
+ /* As we will be reading text, allocate one more byte for a '\0' */
+ if((buf = (char *)malloc( size + 1 )) == NULL) {
+ fprintf(stderr, "%s: malloc fail for size %d.\n", prog_name, size + 1);
+ fclose(fp);
+ exit(1);
+ }
+
+ if(fread( buf, 1, size, fp) != size) {
+ fprintf(stderr, "%s: read failed for file %s. Error was %s.\n", prog_name,
+ input_file, strerror(errno));
+ free((char *)buf);
+ fclose(fp);
+ exit(1);
+ }
+
+ /* Null terminate the text read. */
+ buf[size] = '\0';
+
+ /* Go through the data line by line, strip out comments (anything
+ after a '#' to end-of-line) and blank lines. The rest should be
+ the codepage data.
+ */
+
+ num_lines = clean_data( &buf, &size);
+
+ /*
+ * Initialize the output data.
+ */
+
+ memset(cp_to_ucs2, '\0', sizeof(cp_to_ucs2));
+ ucs2_to_cp[0] = 0;
+ for (i = 1; i < 65536; i++)
+ ucs2_to_cp[i] = (uint16)'_';
+
+ /* Now convert the lines into the compiled form. */
+
+ for(i = 0; i < num_lines; i++) {
+ char token_buf[512];
+ char *p = buf;
+ uint16 cp = 0;
+ uint16 ucs2 = 0;
+
+ /* Get the codepage value. */
+ if(!next_token(&p, token_buf, NULL, sizeof(token_buf)))
+ parse_error(buf, input_file, "cannot parse first value");
+
+ if(!parse_uint16( token_buf, &cp))
+ parse_error(buf, input_file, "first value doesn't resolve to an unsigned 16 bit integer");
+
+ if(cp > 255)
+ multibyte_code_page = True;
+
+ /* Get the ucs2 value. */
+
+ if(!next_token(&p, token_buf, NULL, sizeof(token_buf))) {
+
+ /*
+ * Some of the multibyte codepage to unicode map files
+ * list a single byte as a leading multibyte and have no
+ * second value.
+ */
+
+ buf += (strlen(buf) + 1);
+ continue;
+ }
+
+ if(!parse_uint16( token_buf, &ucs2))
+ parse_error(buf, input_file, "second value doesn't resolve to an unsigned 16 bit integer");
+
+ /*
+ * Set up the cross reference in little-endian format.
+ */
+
+ SSVAL(((char *)&cp_to_ucs2[cp]),0,ucs2);
+ SSVAL(((char *)&ucs2_to_cp[ucs2]),0,cp);
+
+ /*
+ * Next line.
+ */
+ buf += (strlen(buf) + 1);
+ }
+
+ size = UNICODE_MAP_HEADER_SIZE + (multibyte_code_page ? (4*65536) : (2*256 + 2*65536));
+
+ if((output_buf = (char *)malloc( size )) == NULL) {
+ fprintf(stderr, "%s: output buffer malloc fail for size %d.\n", prog_name, size);
+ fclose(fp);
+ exit(1);
+ }
+
+ /* Setup the output file header. */
+ SSVAL(output_buf,UNICODE_MAP_VERSION_OFFSET,UNICODE_MAP_FILE_VERSION_ID);
+ memset(&output_buf[UNICODE_MAP_CLIENT_CODEPAGE_OFFSET],'\0',UNICODE_MAP_CODEPAGE_ID_SIZE);
+ safe_strcpy(&output_buf[UNICODE_MAP_CLIENT_CODEPAGE_OFFSET], codepage, UNICODE_MAP_CODEPAGE_ID_SIZE - 1);
+ output_buf[UNICODE_MAP_CLIENT_CODEPAGE_OFFSET+UNICODE_MAP_CODEPAGE_ID_SIZE-1] = '\0';
+
+ offset = UNICODE_MAP_HEADER_SIZE;
+
+ if (multibyte_code_page) {
+ SIVAL(output_buf,UNICODE_MAP_CP_TO_UNICODE_LENGTH_OFFSET,2*65536);
+ memcpy(output_buf+offset, (char *)cp_to_ucs2, 2*65536);
+ offset += 2*65536;
+ } else {
+ SIVAL(output_buf,UNICODE_MAP_CP_TO_UNICODE_LENGTH_OFFSET,2*256);
+ memcpy(output_buf+offset, (char *)cp_to_ucs2, 2*256);
+ offset += 2*256;
+ }
+ SIVAL(output_buf,UNICODE_MAP_UNICODE_TO_CP_LENGTH_OFFSET,65536*2);
+ memcpy(output_buf+offset, (char *)ucs2_to_cp, 2*65536);
+
+ /* Now write out the output_buf. */
+ if((fp = sys_fopen(output_file, "w"))==NULL) {
+ fprintf(stderr, "%s: Cannot open output file %s. Error was %s.\n",
+ prog_name, output_file, strerror(errno));
+ exit(1);
+ }
+
+ if(fwrite(output_buf, 1, size, fp) != size) {
+ fprintf(stderr, "%s: Cannot write output file %s. Error was %s.\n",
+ prog_name, output_file, strerror(errno));
+ exit(1);
+ }
+
+ fclose(fp);
+
+ return 0;
+}
+
+int main(int argc, char **argv)
+{
+ const char *codepage = NULL;
+ char *input_file = NULL;
+ char *output_file = NULL;
+
+ prog_name = argv[0];
+
+ if(argc != 4)
+ unicode_map_usage(prog_name);
+
+ codepage = argv[1];
+ input_file = argv[2];
+ output_file = argv[3];
+
+ return do_compile( codepage, input_file, output_file);
+}