diff options
Diffstat (limited to 'source4/lib/appweb/ejs-2.0/exml')
-rw-r--r-- | source4/lib/appweb/ejs-2.0/exml/Makefile | 42 | ||||
-rw-r--r-- | source4/lib/appweb/ejs-2.0/exml/exml.h | 94 | ||||
-rw-r--r-- | source4/lib/appweb/ejs-2.0/exml/exmlParser.c | 752 | ||||
-rw-r--r-- | source4/lib/appweb/ejs-2.0/exml/files | 1 |
4 files changed, 889 insertions, 0 deletions
diff --git a/source4/lib/appweb/ejs-2.0/exml/Makefile b/source4/lib/appweb/ejs-2.0/exml/Makefile new file mode 100644 index 0000000000..663e65ed53 --- /dev/null +++ b/source4/lib/appweb/ejs-2.0/exml/Makefile @@ -0,0 +1,42 @@ +# +# Makefile for Embedded XML (EXML) +# +# Copyright (c) Mbedthis Software LLC, 2003-2006. All Rights Reserved. +# + +# +# EXML may be linked into shared handlers so we must build the objects both +# shared and static. +# +COMPILE := *.c +EXPORT_OBJECTS := yes +MAKE_IFLAGS := -I../mpr + +include make.dep + +ifeq ($(BLD_FEATURE_TEST),1) +POST_DIRS := test +endif + +TARGETS += $(BLD_BIN_DIR)/libexml$(BLD_LIB) + +ifeq ($(BLD_FEATURE_XML),1) +compileExtra: $(TARGETS) +endif + +# MOB -- remove when FEATURE_XML is defined +compileExtra: $(TARGETS) + +$(BLD_BIN_DIR)/libexml$(BLD_LIB): $(FILES) + @bld --library $(BLD_BIN_DIR)/libexml \ + --objectsDir $(BLD_OBJ_DIR) --objectList files --libs mpr + +cleanExtra: + @echo "rm -f $(TARGETS)" | $(BLDOUT) + @rm -f $(TARGETS) + @rm -f $(BLD_BIN_DIR)/libexml.* + +## Local variables: +## tab-width: 4 +## End: +## vim: tw=78 sw=4 ts=4 diff --git a/source4/lib/appweb/ejs-2.0/exml/exml.h b/source4/lib/appweb/ejs-2.0/exml/exml.h new file mode 100644 index 0000000000..44c50a56b9 --- /dev/null +++ b/source4/lib/appweb/ejs-2.0/exml/exml.h @@ -0,0 +1,94 @@ +/* + * exml.h -- Embedded Xml Parser header + * + * Copyright (c) Mbedthis Software, LLC, 2003-2003. All Rights Reserved. -- MOB + */ + +#ifndef _h_EXML +#define _h_EXML 1 + +/******************************** Description *********************************/ + +#include "mpr.h" + +/********************************** Defines ***********************************/ + +#if BLD_FEATURE_SQUEEZE + #define EXML_BUFSIZE 512 /* Read buffer size */ +#else + #define EXML_BUFSIZE 1024 /* Read buffer size */ +#endif + +/* + * XML parser states. The states that are passed to the user handler have + * "U" appended to the comment. The error states (ERR and EOF) must be + * negative. + */ +#define EXML_ERR -1 /* Error */ +#define EXML_EOF -2 /* End of input */ +#define EXML_BEGIN 1 /* Before next tag */ +#define EXML_AFTER_LS 2 /* Seen "<" */ +#define EXML_COMMENT 3 /* Seen "<!--" (usr) U */ +#define EXML_NEW_ELT 4 /* Seen "<tag" (usr) U */ +#define EXML_ATT_NAME 5 /* Seen "<tag att" */ +#define EXML_ATT_EQ 6 /* Seen "<tag att" = */ +#define EXML_NEW_ATT 7 /* Seen "<tag att = "val" U */ +#define EXML_SOLO_ELT_DEFINED 8 /* Seen "<tag../>" U */ +#define EXML_ELT_DEFINED 9 /* Seen "<tag...>" U */ +#define EXML_ELT_DATA 10 /* Seen "<tag>....<" U */ +#define EXML_END_ELT 11 /* Seen "<tag>....</tag>" U */ +#define EXML_PI 12 /* Seen "<?processingInst" U */ +#define EXML_CDATA 13 /* Seen "<![CDATA[" U */ + +/* + * Lex tokens + */ +typedef enum ExmlToken { + TOKEN_ERR, + TOKEN_TOO_BIG, /* Token is too big */ + TOKEN_CDATA, + TOKEN_COMMENT, + TOKEN_INSTRUCTIONS, + TOKEN_LS, /* "<" -- Opening a tag */ + TOKEN_LS_SLASH, /* "</" -- Closing a tag */ + TOKEN_GR, /* ">" -- End of an open tag */ + TOKEN_SLASH_GR, /* "/>" -- End of a solo tag */ + TOKEN_TEXT, + TOKEN_EQ, + TOKEN_EOF, + TOKEN_SPACE, +} ExmlToken; + +struct Exml; +typedef int (*ExmlHandler)(struct Exml *xp, int state, + const char *tagName, const char* attName, const char* value); +typedef int (*ExmlInputStream)(struct Exml *xp, void *arg, char *buf, int size); + +/* + * Per XML session structure + */ +typedef struct Exml { + ExmlHandler handler; /* Callback function */ + ExmlInputStream readFn; /* Read data function */ + MprBuf *inBuf; /* Input data queue */ + MprBuf *tokBuf; /* Parsed token buffer */ + int quoteChar; /* XdbAtt quote char */ + int lineNumber; /* Current line no for debug */ + void *parseArg; /* Arg passed to exmlParse() */ + void *inputArg; /* Arg passed to exmlSetInputStream() */ + char *errMsg; /* Error message text */ +} Exml; + +extern Exml *exmlOpen(MprCtx ctx, int initialSize, int maxSize); +extern void exmlClose(Exml *xp); +extern void exmlSetParserHandler(Exml *xp, ExmlHandler h); +extern void exmlSetInputStream(Exml *xp, ExmlInputStream s, void *arg); +extern int exmlParse(Exml *xp); +extern void exmlSetParseArg(Exml *xp, void *parseArg); +extern void *exmlGetParseArg(Exml *xp); +extern const char *exmlGetErrorMsg(Exml *xp); +extern int exmlGetLineNumber(Exml *xp); + +/******************************************************************************/ + +#endif /* _h_EXML */ diff --git a/source4/lib/appweb/ejs-2.0/exml/exmlParser.c b/source4/lib/appweb/ejs-2.0/exml/exmlParser.c new file mode 100644 index 0000000000..14871411a6 --- /dev/null +++ b/source4/lib/appweb/ejs-2.0/exml/exmlParser.c @@ -0,0 +1,752 @@ +/* + * exml.c -- A simple SAX style XML parser + */ + +/********************************* Description ********************************/ +/* + * This is a recursive descent parser for XML text files. It is a one-pass + * simple parser that invokes a user supplied callback for key tokens in the + * XML file. The user supplies a read function so that XML files can be parsed + * from disk or in-memory. + */ +/********************************** Includes **********************************/ + +#include "exml.h" + +/****************************** Forward Declarations **************************/ +/* MOB -- FIX */ +#if BLD_FEATURE_EXML || 1 + +static int parseNext(Exml *xp, int state); +static ExmlToken getToken(Exml *xp, int state); +static int getNextChar(Exml *xp); +static int scanFor(Exml *xp, char *str); +static int putLastChar(Exml *xp, int c); +static void error(Exml *xp, char *fmt, ...); +static void trimToken(Exml *xp); + +/************************************ Code ************************************/ + +Exml *exmlOpen(MprCtx ctx, int initialSize, int maxSize) +{ + Exml *xp; + + xp = mprAllocTypeZeroed(ctx, Exml); + + xp->inBuf = mprCreateBuf(xp, EXML_BUFSIZE, EXML_BUFSIZE); + xp->tokBuf = mprCreateBuf(xp, initialSize, maxSize); + + return xp; +} + +/******************************************************************************/ + +void exmlClose(Exml *xp) +{ + mprAssert(xp); + + mprFree(xp); +} + +/******************************************************************************/ + +void exmlSetParserHandler(Exml *xp, ExmlHandler h) +{ + mprAssert(xp); + + xp->handler = h; +} + +/******************************************************************************/ + +void exmlSetInputStream(Exml *xp, ExmlInputStream s, void *arg) +{ + mprAssert(xp); + + xp->readFn = s; + xp->inputArg = arg; +} + +/******************************************************************************/ +/* + * Set the parse arg + */ + +void exmlSetParseArg(Exml *xp, void *parseArg) +{ + mprAssert(xp); + + xp->parseArg = parseArg; +} + +/******************************************************************************/ +/* + * Set the parse arg + */ + +void *exmlGetParseArg(Exml *xp) +{ + mprAssert(xp); + + return xp->parseArg; +} + +/******************************************************************************/ +/* + * Parse an XML file. Return 0 for success, -1 for error. + */ + +int exmlParse(Exml *xp) +{ + mprAssert(xp); + + return parseNext(xp, EXML_BEGIN); +} + +/******************************************************************************/ +/* + * XML parser. This is a recursive descent parser. Return -1 for errors, 0 for + * EOF and 1 if there is still more data to parse. + */ + +static int parseNext(Exml *xp, int state) +{ + ExmlHandler handler; + ExmlToken token; + MprBuf *tokBuf; + char *tname, *aname; + int rc; + + mprAssert(state >= 0); + + tokBuf = xp->tokBuf; + handler = xp->handler; + tname = aname = 0; + rc = 0; + + /* + * In this parse loop, the state is never assigned EOF or ERR. In + * such cases we always return EOF or ERR. + */ + while (1) { + + token = getToken(xp, state); + + if (token == TOKEN_TOO_BIG) { + error(xp, "XML token is too big"); + goto err; + } + + switch (state) { + case EXML_BEGIN: /* ------------------------------------------ */ + /* + * Expect to get an element, comment or processing instruction + */ + switch (token) { + case TOKEN_EOF: + goto exit; + + case TOKEN_LS: + /* + * Recurse to handle the new element, comment etc. + */ + rc = parseNext(xp, EXML_AFTER_LS); + if (rc < 0) { + goto exit; + } + break; + + default: + error(xp, "Syntax error"); + goto err; + } + break; + + case EXML_AFTER_LS: /* ------------------------------------------ */ + switch (token) { + case TOKEN_COMMENT: + state = EXML_COMMENT; + rc = (*handler)(xp, state, "!--", 0, mprGetBufStart(tokBuf)); + if (rc < 0) { + goto err; + } + rc = 1; + goto exit; + + case TOKEN_CDATA: + state = EXML_CDATA; + rc = (*handler)(xp, state, "!--", 0, mprGetBufStart(tokBuf)); + if (rc < 0) { + goto err; + } + rc = 1; + goto exit; + + case TOKEN_INSTRUCTIONS: + /* Just ignore processing instructions */ + rc = 1; + goto exit; + + case TOKEN_TEXT: + state = EXML_NEW_ELT; + tname = mprStrdup(xp, mprGetBufStart(tokBuf)); + if (tname == 0) { + rc = MPR_ERR_MEMORY; + goto exit; + } + rc = (*handler)(xp, state, tname, 0, 0); + if (rc < 0) { + goto err; + } + break; + + default: + error(xp, "Syntax error"); + goto err; + } + break; + + case EXML_NEW_ELT: /* ------------------------------------------ */ + /* + * We have seen the opening "<element" for a new element and have + * not yet seen the terminating ">" of the opening element. + */ + switch (token) { + case TOKEN_TEXT: + /* + * Must be an attribute name + */ + aname = mprStrdup(xp, mprGetBufStart(tokBuf)); + token = getToken(xp, state); + if (token != TOKEN_EQ) { + error(xp, "Missing assignment for attribute \"%s\"", aname); + goto err; + } + + token = getToken(xp, state); + if (token != TOKEN_TEXT) { + error(xp, "Missing value for attribute \"%s\"", aname); + goto err; + } + state = EXML_NEW_ATT; + rc = (*handler)(xp, state, tname, aname, + mprGetBufStart(tokBuf)); + if (rc < 0) { + goto err; + } + state = EXML_NEW_ELT; + break; + + case TOKEN_GR: + /* + * This is ">" the termination of the opening element + */ + if (*tname == '\0') { + error(xp, "Missing element name"); + goto err; + } + + /* + * Tell the user that the opening element is now complete + */ + state = EXML_ELT_DEFINED; + rc = (*handler)(xp, state, tname, 0, 0); + if (rc < 0) { + goto err; + } + state = EXML_ELT_DATA; + break; + + case TOKEN_SLASH_GR: + /* + * If we see a "/>" then this is a solo element + */ + if (*tname == '\0') { + error(xp, "Missing element name"); + goto err; + } + state = EXML_SOLO_ELT_DEFINED; + rc = (*handler)(xp, state, tname, 0, 0); + if (rc < 0) { + goto err; + } + rc = 1; + goto exit; + + default: + error(xp, "Syntax error"); + goto err; + } + break; + + case EXML_ELT_DATA: /* -------------------------------------- */ + /* + * We have seen the full opening element "<name ...>" and now + * await data or another element. + */ + if (token == TOKEN_LS) { + /* + * Recurse to handle the new element, comment etc. + */ + rc = parseNext(xp, EXML_AFTER_LS); + if (rc < 0) { + goto exit; + } + break; + + } else if (token == TOKEN_LS_SLASH) { + state = EXML_END_ELT; + break; + + } else if (token != TOKEN_TEXT) { + goto err; + } + if (mprGetBufLength(tokBuf) > 0) { + /* + * Pass the data between the element to the user + */ + rc = (*handler)(xp, state, tname, 0, mprGetBufStart(tokBuf)); + if (rc < 0) { + goto err; + } + } + break; + + case EXML_END_ELT: /* -------------------------------------- */ + if (token != TOKEN_TEXT) { + error(xp, "Missing closing element name for \"%s\"", tname); + goto err; + } + /* + * The closing element name must match the opening element name + */ + if (strcmp(tname, mprGetBufStart(tokBuf)) != 0) { + error(xp, + "Closing element name \"%s\" does not match on line %d" + "opening name \"%s\"", + mprGetBufStart(tokBuf), xp->lineNumber, tname); + goto err; + } + rc = (*handler)(xp, state, tname, 0, 0); + if (rc < 0) { + goto err; + } + if (getToken(xp, state) != TOKEN_GR) { + error(xp, "Syntax error"); + goto err; + } + return 1; + + case EXML_EOF: /* ---------------------------------------------- */ + goto exit; + + case EXML_ERR: /* ---------------------------------------------- */ + default: + goto err; + } + } + mprAssert(0); + +err: + rc = -1; + +exit: + mprFree(tname); + mprFree(aname); + + return rc; +} + +/******************************************************************************/ +/* + * Lexical analyser for XML. Return the next token reading input as required. + * It uses a one token look ahead and push back mechanism (LAR1 parser). + * Text token identifiers are left in the tokBuf parser buffer on exit. + * This Lex has special cases for the states EXML_ELT_DATA where we + * have an optimized read of element data, and EXML_AFTER_LS where we + * distinguish between element names, processing instructions and comments. + */ + +static ExmlToken getToken(Exml *xp, int state) +{ + MprBuf *tokBuf, *inBuf; + uchar *cp; + int c, rc; + + tokBuf = xp->tokBuf; + inBuf = xp->inBuf; + + mprAssert(state >= 0); + + if ((c = getNextChar(xp)) < 0) { + return TOKEN_EOF; + } + mprFlushBuf(tokBuf); + + /* + * Special case parsing for names and for element data. We do this for + * performance so we can return to the caller the largest token possible + */ + if (state == EXML_ELT_DATA) { + /* + * Read all the data up to the start of the closing element "<" or the + * start of a sub-element. + */ +#if UNUSED + while (isspace(c)) { + if ((c = getNextChar(xp)) < 0) { + return TOKEN_EOF; + } + } +#endif + if (c == '<') { + if ((c = getNextChar(xp)) < 0) { + return TOKEN_EOF; + } + if (c == '/') { + return TOKEN_LS_SLASH; + } + putLastChar(xp, c); + return TOKEN_LS; + } + do { + if (mprPutCharToBuf(tokBuf, c) < 0) { + return TOKEN_TOO_BIG; + } + if ((c = getNextChar(xp)) < 0) { + return TOKEN_EOF; + } + } while (c != '<'); + + /* + * Put back the last look-ahead character + */ + putLastChar(xp, c); + + /* + * If all white space, then zero the token buffer + */ + for (cp = tokBuf->start; *cp; cp++) { + if (!isspace(*cp)) { + return TOKEN_TEXT; + } + } + mprFlushBuf(tokBuf); + return TOKEN_TEXT; + } + + while (1) { + switch (c) { + case ' ': + case '\n': + case '\t': + case '\r': + break; + + case '<': + if ((c = getNextChar(xp)) < 0) { + return TOKEN_EOF; + } + if (c == '/') { + return TOKEN_LS_SLASH; + } + putLastChar(xp, c); + return TOKEN_LS; + + case '=': + return TOKEN_EQ; + + case '>': + return TOKEN_GR; + + case '/': + if ((c = getNextChar(xp)) < 0) { + return TOKEN_EOF; + } + if (c == '>') { + return TOKEN_SLASH_GR; + } + return TOKEN_ERR; + + case '\"': + case '\'': + xp->quoteChar = c; + /* Fall through */ + + default: + /* + * We handle element names, attribute names and attribute values + * here. We do NOT handle data between elements here. Read the + * token. Stop on white space or a closing element ">" + */ + if (xp->quoteChar) { + if ((c = getNextChar(xp)) < 0) { + return TOKEN_EOF; + } + while (c != xp->quoteChar) { + if (mprPutCharToBuf(tokBuf, c) < 0) { + return TOKEN_TOO_BIG; + } + if ((c = getNextChar(xp)) < 0) { + return TOKEN_EOF; + } + } + xp->quoteChar = 0; + + } else { + while (!isspace(c) && c != '>' && c != '/' && c != '=') { + if (mprPutCharToBuf(tokBuf, c) < 0) { + return TOKEN_TOO_BIG; + } + if ((c = getNextChar(xp)) < 0) { + return TOKEN_EOF; + } + } + putLastChar(xp, c); + } + if (mprGetBufLength(tokBuf) <= 0) { + return TOKEN_ERR; + } + mprAddNullToBuf(tokBuf); + + if (state == EXML_AFTER_LS) { + /* + * If we are just inside an element "<", then analyze what we + * have to see if we have an element name, instruction or + * comment. Tokbuf will hold "?" for instructions or "!--" + * for comments. + */ + if (mprLookAtNextCharInBuf(tokBuf) == '?') { + /* Just ignore processing instructions */ + rc = scanFor(xp, "?>"); + if (rc < 0) { + return TOKEN_TOO_BIG; + } else if (rc == 0) { + return TOKEN_ERR; + } + return TOKEN_INSTRUCTIONS; + + } else if (mprLookAtNextCharInBuf(tokBuf) == '!') { + /* + * First discard the comment leadin "!--" and eat leading + * white space. + */ + if (strcmp((char*) tokBuf->start, "![CDATA[") == 0) { + mprFlushBuf(tokBuf); +#if UNUSED + c = mprLookAtNextCharInBuf(inBuf); + while (isspace(c)) { + if ((c = getNextChar(xp)) < 0) { + return TOKEN_EOF; + } + c = mprLookAtNextCharInBuf(inBuf); + } +#endif + rc = scanFor(xp, "]]>"); + if (rc < 0) { + return TOKEN_TOO_BIG; + } else if (rc == 0) { + return TOKEN_ERR; + } + return TOKEN_CDATA; + + } else { + mprFlushBuf(tokBuf); +#if UNUSED + c = mprLookAtNextCharInBuf(inBuf); + while (isspace(c)) { + if ((c = getNextChar(xp)) < 0) { + return TOKEN_EOF; + } + c = mprLookAtNextCharInBuf(inBuf); + } +#endif + rc = scanFor(xp, "-->"); + if (rc < 0) { + return TOKEN_TOO_BIG; + } else if (rc == 0) { + return TOKEN_ERR; + } + return TOKEN_COMMENT; + } + } + } + trimToken(xp); + return TOKEN_TEXT; + } + if ((c = getNextChar(xp)) < 0) { + return TOKEN_EOF; + } + } + + /* Should never get here */ + mprAssert(0); + return TOKEN_ERR; +} + +/******************************************************************************/ +/* + * Scan for a pattern. Eat and discard input up to the pattern. Return 1 if + * the pattern was found, return 0 if not found. Return < 0 on errors. + */ + +static int scanFor(Exml *xp, char *str) +{ + MprBuf *tokBuf; + char *cp; + int c; + + mprAssert(str); + + tokBuf = xp->tokBuf; + + while (1) { + for (cp = str; *cp; cp++) { + if ((c = getNextChar(xp)) < 0) { + return 0; + } + if (tokBuf) { + if (mprPutCharToBuf(tokBuf, c) < 0) { + return -1; + } + } + if (c != *cp) { + break; + } + } + if (*cp == '\0') { + /* + * Remove the pattern from the tokBuf + */ + if (tokBuf) { + mprAdjustBufEnd(tokBuf, -(int) strlen(str)); + trimToken(xp); + } + return 1; + } + } +} + +/******************************************************************************/ +/* + * Get another character. We read and buffer blocks of data if we need more + * data to parse. + */ + +static int getNextChar(Exml *xp) +{ + MprBuf *inBuf; + char c; + int l; + + inBuf = xp->inBuf; + if (mprGetBufLength(inBuf) <= 0) { + /* + * Flush to reset the servp/endp pointers to the start of the buffer + * so we can do a maximal read + */ + mprFlushBuf(inBuf); + l = (xp->readFn)(xp, xp->inputArg, mprGetBufStart(inBuf), + mprGetBufLinearSpace(inBuf)); + if (l <= 0) { + return -1; + } + mprAdjustBufEnd(inBuf, l); + } + c = mprGetCharFromBuf(inBuf); + + if (c == '\n') { + xp->lineNumber++; + } + return c; +} + +/******************************************************************************/ +/* + * Put back a character in the input buffer + */ + +static int putLastChar(Exml *xp, int c) +{ + if (mprInsertCharToBuf(xp->inBuf, (char) c) < 0) { + mprAssert(0); + return -1; + } + if (c == '\n') { + xp->lineNumber--; + } + return 0; +} + +/******************************************************************************/ +/* + * Output a parse message + */ + +static void error(Exml *xp, char *fmt, ...) +{ + va_list args; + char *buf; + + mprAssert(fmt); + + va_start(args, fmt); + mprAllocVsprintf(MPR_LOC_ARGS(xp), &buf, MPR_MAX_STRING, fmt, args); + va_end(args); + + /* + * MOB need to add the failing line text and a pointer to which column + */ + mprFree(xp->errMsg); + mprAllocSprintf(MPR_LOC_ARGS(xp), &xp->errMsg, MPR_MAX_STRING, + "XML error: %s\nAt line %d\n", buf, xp->lineNumber); + + mprFree(buf); +} + +/******************************************************************************/ +/* + * Remove trailing whitespace in a token and ensure it is terminated with + * a NULL for easy parsing + */ + +static void trimToken(Exml *xp) +{ + while (isspace(mprLookAtLastCharInBuf(xp->tokBuf))) { + mprAdjustBufEnd(xp->tokBuf, -1); + } + mprAddNullToBuf(xp->tokBuf); +} + +/******************************************************************************/ + +const char *exmlGetErrorMsg(Exml *xp) +{ + if (xp->errMsg == 0) { + return ""; + } + return xp->errMsg; +} + +/******************************************************************************/ + +int exmlGetLineNumber(Exml *xp) +{ + return xp->lineNumber; +} + +/******************************************************************************/ +#else + +void exmlParserDummy() {} +#endif /* BLD_FEATURE_EXML */ + +/* + * Local variables: + * tab-width: 4 + * c-basic-offset: 4 + * End: + * vim:tw=78 + * vim600: sw=4 ts=4 fdm=marker + * vim<600: sw=4 ts=4 + */ diff --git a/source4/lib/appweb/ejs-2.0/exml/files b/source4/lib/appweb/ejs-2.0/exml/files new file mode 100644 index 0000000000..0f10ea44dd --- /dev/null +++ b/source4/lib/appweb/ejs-2.0/exml/files @@ -0,0 +1 @@ +${BLD_OBJ_DIR}/exmlParser${BLD_OBJ} |