r18925: Add current snapshot of the ejs-2.0 code. Tridge, will you be incorporating this?

(This used to be commit 917af234a8d517f82bd42256a940608a16b988f4)
author: Derrell Lipman <derrell@samba.org> 2006-09-26 16:58:27 +0000
committer: Gerald (Jerry) Carter <jerry@samba.org> 2007-10-10 14:20:21 -0500
commit: 77e8402dd68079c0e245fc8826daf2c6ad334766 (patch)
tree: 64f65c7d3fb7f3aa970311f70e3f8bae4c0fc40c /source4/lib/appweb/ejs-2.0/exml/exmlParser.c
parent: 06de0f0b743056ba845de000339d4c3beb7cb845 (diff)
download: samba-77e8402dd68079c0e245fc8826daf2c6ad334766.tar.gz
samba-77e8402dd68079c0e245fc8826daf2c6ad334766.tar.bz2
samba-77e8402dd68079c0e245fc8826daf2c6ad334766.zip
1 files changed, 752 insertions, 0 deletions
diff --git a/source4/lib/appweb/ejs-2.0/exml/exmlParser.c b/source4/lib/appweb/ejs-2.0/exml/exmlParser.c
new file mode 100644
index 0000000000..14871411a6
--- /dev/null
+++ b/source4/lib/appweb/ejs-2.0/exml/exmlParser.c
@@ -0,0 +1,752 @@
+/*
+ *	exml.c -- A simple SAX style XML parser
+ */
+
+/********************************* Description ********************************/
+/*
+ *	This is a recursive descent parser for XML text files. It is a one-pass
+ *	simple parser that invokes a user supplied callback for key tokens in the
+ *	XML file. The user supplies a read function so that XML files can be parsed
+ *	from disk or in-memory. 
+ */
+/********************************** Includes **********************************/
+
+#include	"exml.h"
+
+/****************************** Forward Declarations **************************/
+/* MOB -- FIX */
+#if BLD_FEATURE_EXML || 1
+
+static int 		 parseNext(Exml *xp, int state);
+static ExmlToken getToken(Exml *xp, int state);
+static int 		 getNextChar(Exml *xp);
+static int 		 scanFor(Exml *xp, char *str);
+static int 		 putLastChar(Exml *xp, int c);
+static void 	 error(Exml *xp, char *fmt, ...);
+static void 	 trimToken(Exml *xp);
+
+/************************************ Code ************************************/
+
+Exml *exmlOpen(MprCtx ctx, int initialSize, int maxSize)
+{
+	Exml	*xp;
+
+	xp = mprAllocTypeZeroed(ctx, Exml);
+	
+	xp->inBuf = mprCreateBuf(xp, EXML_BUFSIZE, EXML_BUFSIZE);
+	xp->tokBuf = mprCreateBuf(xp, initialSize, maxSize);
+
+	return xp;
+}
+
+/******************************************************************************/
+
+void exmlClose(Exml *xp)
+{
+	mprAssert(xp);
+
+	mprFree(xp);
+}
+
+/******************************************************************************/
+
+void exmlSetParserHandler(Exml *xp, ExmlHandler h)
+{
+	mprAssert(xp);
+
+	xp->handler = h;
+}
+
+/******************************************************************************/
+
+void exmlSetInputStream(Exml *xp, ExmlInputStream s, void *arg)
+{
+	mprAssert(xp);
+
+	xp->readFn = s;
+	xp->inputArg = arg;
+}
+
+/******************************************************************************/
+/*
+ *	Set the parse arg
+ */ 
+
+void exmlSetParseArg(Exml *xp, void *parseArg)
+{
+	mprAssert(xp);
+
+	xp->parseArg = parseArg;
+}
+
+/******************************************************************************/
+/*
+ *	Set the parse arg
+ */ 
+
+void *exmlGetParseArg(Exml *xp)
+{
+	mprAssert(xp);
+
+	return xp->parseArg;
+}
+
+/******************************************************************************/
+/*
+ *	Parse an XML file. Return 0 for success, -1 for error.
+ */ 
+
+int	exmlParse(Exml *xp)
+{
+	mprAssert(xp);
+
+	return parseNext(xp, EXML_BEGIN);
+}
+
+/******************************************************************************/
+/*
+ *	XML parser. This is a recursive descent parser. Return -1 for errors, 0 for
+ *	EOF and 1 if there is still more data to parse.
+ */
+
+static int parseNext(Exml *xp, int state)
+{
+	ExmlHandler	handler;
+	ExmlToken	token;
+	MprBuf		*tokBuf;
+	char		*tname, *aname;
+	int			rc;
+
+	mprAssert(state >= 0);
+
+	tokBuf = xp->tokBuf;
+	handler = xp->handler;
+	tname = aname = 0;
+	rc = 0;
+	
+	/*
+	 *	In this parse loop, the state is never assigned EOF or ERR. In
+	 *	such cases we always return EOF or ERR.
+	 */
+	while (1) {
+
+		token = getToken(xp, state);
+
+		if (token == TOKEN_TOO_BIG) {
+			error(xp, "XML token is too big");
+			goto err;
+		}
+
+		switch (state) {
+		case EXML_BEGIN:		/* ------------------------------------------ */
+			/*
+			 *	Expect to get an element, comment or processing instruction 
+			 */
+			switch (token) {
+			case TOKEN_EOF:
+				goto exit;
+
+			case TOKEN_LS:
+				/*
+				 *	Recurse to handle the new element, comment etc.
+				 */
+				rc = parseNext(xp, EXML_AFTER_LS);
+				if (rc < 0) {
+					goto exit;
+				}
+				break;
+
+			default:
+				error(xp, "Syntax error");
+				goto err;
+			}
+			break;
+
+		case EXML_AFTER_LS: /* ------------------------------------------ */
+			switch (token) {
+			case TOKEN_COMMENT:
+				state = EXML_COMMENT;
+				rc = (*handler)(xp, state, "!--", 0, mprGetBufStart(tokBuf));
+				if (rc < 0) {
+					goto err;
+				}
+				rc = 1;
+				goto exit;
+
+			case TOKEN_CDATA:
+				state = EXML_CDATA;
+				rc = (*handler)(xp, state, "!--", 0, mprGetBufStart(tokBuf));
+				if (rc < 0) {
+					goto err;
+				}
+				rc = 1;
+				goto exit;
+
+			case TOKEN_INSTRUCTIONS:
+				/* Just ignore processing instructions */
+				rc = 1;
+				goto exit;
+
+			case TOKEN_TEXT:
+				state = EXML_NEW_ELT;
+				tname = mprStrdup(xp, mprGetBufStart(tokBuf));
+				if (tname == 0) {
+					rc = MPR_ERR_MEMORY;
+					goto exit;
+				}
+				rc = (*handler)(xp, state, tname, 0, 0);
+				if (rc < 0) {
+					goto err;
+				}
+				break;
+
+			default:
+				error(xp, "Syntax error");
+				goto err;
+			}
+			break;
+
+		case EXML_NEW_ELT: 	/* ------------------------------------------ */
+			/*
+			 *	We have seen the opening "<element" for a new element and have
+ 			 *	not yet seen the terminating ">" of the opening element.
+			 */
+			switch (token) {
+			case TOKEN_TEXT:
+				/*
+				 *	Must be an attribute name
+				 */
+				aname = mprStrdup(xp, mprGetBufStart(tokBuf));
+				token = getToken(xp, state);
+				if (token != TOKEN_EQ) {
+					error(xp, "Missing assignment for attribute \"%s\"", aname);
+					goto err;
+				}
+
+				token = getToken(xp, state);
+				if (token != TOKEN_TEXT) {
+					error(xp, "Missing value for attribute \"%s\"", aname);
+					goto err;
+				}
+				state = EXML_NEW_ATT;
+				rc = (*handler)(xp, state, tname, aname,
+						mprGetBufStart(tokBuf));
+				if (rc < 0) {
+					goto err;
+				}
+				state = EXML_NEW_ELT;
+				break;
+
+			case TOKEN_GR:
+				/*
+ 				 *	This is ">" the termination of the opening element
+				 */
+				if (*tname == '\0') {
+					error(xp, "Missing element name");
+					goto err;
+				}
+
+				/*
+				 *	Tell the user that the opening element is now complete
+ 				 */
+				state = EXML_ELT_DEFINED;
+				rc = (*handler)(xp, state, tname, 0, 0);
+				if (rc < 0) {
+					goto err;
+				}
+				state = EXML_ELT_DATA;
+				break;
+
+			case TOKEN_SLASH_GR:
+				/*
+				 *	If we see a "/>" then this is a solo element
+ 				 */
+				if (*tname == '\0') {
+					error(xp, "Missing element name");
+					goto err;
+				}
+				state = EXML_SOLO_ELT_DEFINED;
+				rc = (*handler)(xp, state, tname, 0, 0);
+				if (rc < 0) {
+					goto err;
+				}
+				rc = 1;
+				goto exit;
+	
+			default:
+				error(xp, "Syntax error");
+				goto err;
+			}
+			break;
+
+		case EXML_ELT_DATA:		/* -------------------------------------- */
+			/*
+			 *	We have seen the full opening element "<name ...>" and now 
+			 *	await data or another element.
+			 */
+			if (token == TOKEN_LS) {
+				/*
+				 *	Recurse to handle the new element, comment etc.
+				 */
+				rc = parseNext(xp, EXML_AFTER_LS);
+				if (rc < 0) {
+					goto exit;
+				}
+				break;
+
+			} else if (token == TOKEN_LS_SLASH) {
+				state = EXML_END_ELT;
+				break;
+
+			} else if (token != TOKEN_TEXT) {
+				goto err;
+			}
+			if (mprGetBufLength(tokBuf) > 0) {
+				/*
+				 *	Pass the data between the element to the user
+				 */
+				rc = (*handler)(xp, state, tname, 0, mprGetBufStart(tokBuf));
+				if (rc < 0) {
+					goto err;
+				}
+			}
+			break;
+
+		case EXML_END_ELT:			/* -------------------------------------- */
+			if (token != TOKEN_TEXT) {
+				error(xp, "Missing closing element name for \"%s\"", tname);
+				goto err;
+			}
+			/*
+			 *	The closing element name must match the opening element name 
+			 */
+			if (strcmp(tname, mprGetBufStart(tokBuf)) != 0) {
+				error(xp, 
+					"Closing element name \"%s\" does not match on line %d"
+					"opening name \"%s\"",
+					mprGetBufStart(tokBuf), xp->lineNumber, tname);
+				goto err;
+			}
+			rc = (*handler)(xp, state, tname, 0, 0);
+			if (rc < 0) {
+				goto err;
+			}
+			if (getToken(xp, state) != TOKEN_GR) {
+				error(xp, "Syntax error");
+				goto err;
+			}
+			return 1;
+
+		case EXML_EOF: 		/* ---------------------------------------------- */
+			goto exit;
+
+		case EXML_ERR:  	/* ---------------------------------------------- */
+		default:
+			goto err;
+		}
+	}
+	mprAssert(0);
+
+err:
+	rc = -1;
+
+exit:
+	mprFree(tname);
+	mprFree(aname);
+
+	return rc;
+}
+
+/******************************************************************************/
+/*
+ *	Lexical analyser for XML. Return the next token reading input as required.
+ *	It uses a one token look ahead and push back mechanism (LAR1 parser).
+ *	Text token identifiers are left in the tokBuf parser buffer on exit.
+ *	This Lex has special cases for the states EXML_ELT_DATA where we
+ *	have an optimized read of element data, and EXML_AFTER_LS where we 
+ *	distinguish between element names, processing instructions and comments. 
+ */
+
+static ExmlToken getToken(Exml *xp, int state)
+{
+	MprBuf		*tokBuf, *inBuf;
+	uchar		*cp;
+	int			c, rc;
+
+	tokBuf = xp->tokBuf;
+	inBuf = xp->inBuf;
+
+	mprAssert(state >= 0);
+
+	if ((c = getNextChar(xp)) < 0) {
+		return TOKEN_EOF;
+	}
+	mprFlushBuf(tokBuf);
+
+	/*
+	 *	Special case parsing for names and for element data. We do this for
+	 *	performance so we can return to the caller the largest token possible
+	 */
+	if (state == EXML_ELT_DATA) {
+		/*
+		 *	Read all the data up to the start of the closing element "<" or the
+		 *	start of a sub-element.
+ 		 */
+#if UNUSED
+		while (isspace(c)) {
+			if ((c = getNextChar(xp)) < 0) {
+				return TOKEN_EOF;
+			}
+		}
+#endif
+		if (c == '<') {
+			if ((c = getNextChar(xp)) < 0) {
+				return TOKEN_EOF;
+			}
+			if (c == '/') {
+				return TOKEN_LS_SLASH;
+			}
+			putLastChar(xp, c);
+			return TOKEN_LS;
+		}
+		do {
+			if (mprPutCharToBuf(tokBuf, c) < 0) {
+				return TOKEN_TOO_BIG;
+			}
+			if ((c = getNextChar(xp)) < 0) {
+				return TOKEN_EOF;
+			}
+		} while (c != '<');
+
+		/*
+		 *	Put back the last look-ahead character
+		 */
+		putLastChar(xp, c);
+
+		/*
+		 *	If all white space, then zero the token buffer
+		 */
+		for (cp = tokBuf->start; *cp; cp++) {
+			if (!isspace(*cp)) {
+				return TOKEN_TEXT;
+			}
+		}
+		mprFlushBuf(tokBuf);
+		return TOKEN_TEXT;
+	}
+
+	while (1) {
+		switch (c) {
+		case ' ':
+		case '\n':
+		case '\t':
+		case '\r':
+			break;
+
+		case '<':
+			if ((c = getNextChar(xp)) < 0) {
+				return TOKEN_EOF;
+			}
+			if (c == '/') {
+				return TOKEN_LS_SLASH;
+			}
+			putLastChar(xp, c);
+			return TOKEN_LS;
+	
+		case '=':
+			return TOKEN_EQ;
+
+		case '>':
+			return TOKEN_GR;
+
+		case '/':
+			if ((c = getNextChar(xp)) < 0) {
+				return TOKEN_EOF;
+			}
+			if (c == '>') {
+				return TOKEN_SLASH_GR;
+			}
+			return TOKEN_ERR;
+		
+		case '\"':
+		case '\'':
+			xp->quoteChar = c;
+			/* Fall through */
+
+		default:
+			/*
+ 			 *	We handle element names, attribute names and attribute values 
+			 *	here. We do NOT handle data between elements here. Read the 
+			 *	token.  Stop on white space or a closing element ">"
+			 */
+			if (xp->quoteChar) {
+				if ((c = getNextChar(xp)) < 0) {
+					return TOKEN_EOF;
+				}
+				while (c != xp->quoteChar) {
+					if (mprPutCharToBuf(tokBuf, c) < 0) {
+						return TOKEN_TOO_BIG;
+					}
+					if ((c = getNextChar(xp)) < 0) {
+						return TOKEN_EOF;
+					}
+				}
+				xp->quoteChar = 0;
+
+			} else {
+				while (!isspace(c) && c != '>' && c != '/' && c != '=') {
+					if (mprPutCharToBuf(tokBuf, c) < 0) {
+						return TOKEN_TOO_BIG;
+					}
+					if ((c = getNextChar(xp)) < 0) {
+						return TOKEN_EOF;
+					}
+				}
+				putLastChar(xp, c);
+			}
+			if (mprGetBufLength(tokBuf) <= 0) {
+				return TOKEN_ERR;
+			}
+			mprAddNullToBuf(tokBuf);
+
+			if (state == EXML_AFTER_LS) {
+				/*
+				 *	If we are just inside an element "<", then analyze what we
+				 *	have to see if we have an element name, instruction or
+ 				 *	comment. Tokbuf will hold "?" for instructions or "!--"
+				 *	for comments.
+				 */
+				if (mprLookAtNextCharInBuf(tokBuf) == '?') {
+					/*	Just ignore processing instructions */
+					rc = scanFor(xp, "?>");
+					if (rc < 0) {
+						return TOKEN_TOO_BIG;
+					} else if (rc == 0) {
+						return TOKEN_ERR;
+					}
+					return TOKEN_INSTRUCTIONS;
+
+				} else if (mprLookAtNextCharInBuf(tokBuf) == '!') {
+					/*
+					 *	First discard the comment leadin "!--" and eat leading 
+					 *	white space.
+					 */
+					if (strcmp((char*) tokBuf->start, "![CDATA[") == 0) {
+						mprFlushBuf(tokBuf);
+#if UNUSED
+						c = mprLookAtNextCharInBuf(inBuf);
+						while (isspace(c)) {
+							if ((c = getNextChar(xp)) < 0) {
+								return TOKEN_EOF;
+							}
+							c = mprLookAtNextCharInBuf(inBuf);
+						}
+#endif
+						rc = scanFor(xp, "]]>");
+						if (rc < 0) {
+							return TOKEN_TOO_BIG;
+						} else if (rc == 0) {
+							return TOKEN_ERR;
+						}
+						return TOKEN_CDATA;
+
+					} else {
+						mprFlushBuf(tokBuf);
+#if UNUSED
+						c = mprLookAtNextCharInBuf(inBuf);
+						while (isspace(c)) {
+							if ((c = getNextChar(xp)) < 0) {
+								return TOKEN_EOF;
+							}
+							c = mprLookAtNextCharInBuf(inBuf);
+						}
+#endif
+						rc = scanFor(xp, "-->");
+						if (rc < 0) {
+							return TOKEN_TOO_BIG;
+						} else if (rc == 0) {
+							return TOKEN_ERR;
+						}
+						return TOKEN_COMMENT;
+					}
+				}
+			}
+			trimToken(xp);
+			return TOKEN_TEXT;
+		}
+		if ((c = getNextChar(xp)) < 0) {
+			return TOKEN_EOF;
+		}
+	}
+
+	/* Should never get here */
+	mprAssert(0);
+	return TOKEN_ERR;
+}
+
+/******************************************************************************/
+/*
+ *	Scan for a pattern. Eat and discard input up to the pattern. Return 1 if
+ *	the pattern was found, return 0 if not found. Return < 0 on errors.
+ */
+
+static int scanFor(Exml *xp, char *str)
+{
+	MprBuf	*tokBuf;
+	char	*cp;
+	int		c;
+
+	mprAssert(str);
+
+	tokBuf = xp->tokBuf;
+
+	while (1) {
+		for (cp = str; *cp; cp++) {
+			if ((c = getNextChar(xp)) < 0) {
+				return 0;
+			}
+			if (tokBuf) {
+				if (mprPutCharToBuf(tokBuf, c) < 0) {
+					return -1;
+				}
+			}
+			if (c != *cp) {
+				break;
+			}
+		}
+		if (*cp == '\0') {
+			/*
+			 *	Remove the pattern from the tokBuf
+			 */
+			if (tokBuf) {
+				mprAdjustBufEnd(tokBuf, -(int) strlen(str));
+				trimToken(xp);
+			}
+			return 1;
+		}
+	}
+}
+
+/******************************************************************************/
+/*
+ *	Get another character. We read and buffer blocks of data if we need more
+ *	data to parse.
+ */
+
+static int getNextChar(Exml *xp)
+{
+	MprBuf	*inBuf;
+	char	c;
+	int		l;
+
+	inBuf = xp->inBuf;
+	if (mprGetBufLength(inBuf) <= 0) {
+		/*
+ 		 *	Flush to reset the servp/endp pointers to the start of the buffer
+		 *	so we can do a maximal read 
+		 */
+		mprFlushBuf(inBuf);
+		l = (xp->readFn)(xp, xp->inputArg, mprGetBufStart(inBuf), 
+			mprGetBufLinearSpace(inBuf));
+		if (l <= 0) {
+			return -1;
+		}
+		mprAdjustBufEnd(inBuf, l);
+	}
+	c = mprGetCharFromBuf(inBuf);
+
+	if (c == '\n') {
+		xp->lineNumber++;
+	}
+	return c;
+}
+
+/******************************************************************************/
+/*
+ *	Put back a character in the input buffer
+ */
+
+static int putLastChar(Exml *xp, int c)
+{
+	if (mprInsertCharToBuf(xp->inBuf, (char) c) < 0) {
+		mprAssert(0);
+		return -1;
+	}
+	if (c == '\n') {
+		xp->lineNumber--;
+	}
+	return 0;
+}
+
+/******************************************************************************/
+/*
+ *	Output a parse message
+ */ 
+
+static void error(Exml *xp, char *fmt, ...)
+{
+	va_list		args;
+	char		*buf;
+
+	mprAssert(fmt);
+
+	va_start(args, fmt);
+	mprAllocVsprintf(MPR_LOC_ARGS(xp), &buf, MPR_MAX_STRING, fmt, args);
+	va_end(args);
+
+	/*
+	 *	MOB need to add the failing line text and a pointer to which column
+	 */
+	mprFree(xp->errMsg);
+	mprAllocSprintf(MPR_LOC_ARGS(xp), &xp->errMsg, MPR_MAX_STRING, 
+		"XML error: %s\nAt line %d\n", buf, xp->lineNumber);
+
+	mprFree(buf);
+}
+
+/******************************************************************************/
+/*
+ *	Remove trailing whitespace in a token and ensure it is terminated with
+ *	a NULL for easy parsing
+ */
+
+static void trimToken(Exml *xp)
+{
+	while (isspace(mprLookAtLastCharInBuf(xp->tokBuf))) {
+		mprAdjustBufEnd(xp->tokBuf, -1);
+	}
+	mprAddNullToBuf(xp->tokBuf);
+}
+
+/******************************************************************************/
+
+const char *exmlGetErrorMsg(Exml *xp)
+{
+	if (xp->errMsg == 0) {
+		return "";
+	}
+	return xp->errMsg;
+}
+
+/******************************************************************************/
+
+int exmlGetLineNumber(Exml *xp)
+{
+	return xp->lineNumber;
+}
+
+/******************************************************************************/
+#else
+
+void exmlParserDummy() {}
+#endif /* BLD_FEATURE_EXML */
+
+/*
+ * Local variables:
+ * tab-width: 4
+ * c-basic-offset: 4
+ * End:
+ * vim:tw=78
+ * vim600: sw=4 ts=4 fdm=marker
+ * vim<600: sw=4 ts=4
+ */
author	Derrell Lipman <derrell@samba.org>	2006-09-26 16:58:27 +0000
committer	Gerald (Jerry) Carter <jerry@samba.org>	2007-10-10 14:20:21 -0500
commit	77e8402dd68079c0e245fc8826daf2c6ad334766 (patch)
tree	64f65c7d3fb7f3aa970311f70e3f8bae4c0fc40c /source4/lib/appweb/ejs-2.0/exml/exmlParser.c
parent	06de0f0b743056ba845de000339d4c3beb7cb845 (diff)
download	samba-77e8402dd68079c0e245fc8826daf2c6ad334766.tar.gz samba-77e8402dd68079c0e245fc8826daf2c6ad334766.tar.bz2 samba-77e8402dd68079c0e245fc8826daf2c6ad334766.zip