From 14ade23914b328fcb5924488c2416dfa7748e4ab Mon Sep 17 00:00:00 2001 From: Tim Potter Date: Sun, 29 May 2005 03:53:36 +0000 Subject: r7065: Move ejs from web_server to lib so it can be shared with smbscript. (This used to be commit b83dc8fbfb9ffe30654bc4869398f50dd9ccccb7) --- source4/lib/ejs/ejsLex.c | 910 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 910 insertions(+) create mode 100644 source4/lib/ejs/ejsLex.c (limited to 'source4/lib/ejs/ejsLex.c') diff --git a/source4/lib/ejs/ejsLex.c b/source4/lib/ejs/ejsLex.c new file mode 100644 index 0000000000..b0d6483c2a --- /dev/null +++ b/source4/lib/ejs/ejsLex.c @@ -0,0 +1,910 @@ +/* + * @file ejsLex.c + * @brief EJS Lexical Analyser + * @overview EJS lexical analyser. This implementes a lexical analyser + * for a subset of the JavaScript language. + */ +/********************************* Copyright **********************************/ +/* + * @copy default.g + * + * Copyright (c) Mbedthis Software LLC, 2003-2005. All Rights Reserved. + * Portions Copyright (c) GoAhead Software, 1995-2000. All Rights Reserved. + * + * This software is distributed under commercial and open source licenses. + * You may use the GPL open source license described below or you may acquire + * a commercial license from Mbedthis Software. You agree to be fully bound + * by the terms of either license. Consult the LICENSE.TXT distributed with + * this software for full details. + * + * This software is open source; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. See the GNU General Public License for more + * details at: http://www.mbedthis.com/downloads/gplLicense.html + * + * This program is distributed WITHOUT ANY WARRANTY; without even the + * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * This GPL license does NOT permit incorporating this software into + * proprietary programs. If you are unable to comply with the GPL, you must + * acquire a commercial license to use this software. Commercial licenses + * for this software and support services are available from Mbedthis + * Software at http://www.mbedthis.com + * + * @end + */ +/********************************** Includes **********************************/ + +#include "lib/ejs/ejsInternal.h" + +#if BLD_FEATURE_EJS + +/****************************** Forward Declarations **************************/ + +static int getLexicalToken(Ejs *ep, int state); +static int tokenAddChar(Ejs *ep, int c); +static int inputGetc(Ejs *ep); +static void inputPutback(Ejs *ep, int c); +static int charConvert(Ejs *ep, int base, int maxDig); + +/************************************* Code ***********************************/ +/* + * Open a new input script + */ + +int ejsLexOpenScript(Ejs *ep, char *script) +{ + EjsInput *ip; + + mprAssert(ep); + mprAssert(script); + + if ((ep->input = mprMalloc(sizeof(EjsInput))) == NULL) { + return -1; + } + ip = ep->input; + memset(ip, 0, sizeof(*ip)); + +/* + * Create the parse token buffer and script buffer + */ + ip->tokbuf = mprMalloc(EJS_PARSE_INCR); + ip->tokSize = EJS_PARSE_INCR; + ip->tokServp = ip->tokbuf; + ip->tokEndp = ip->tokbuf; + + ip->script = mprStrdup(script); + ip->scriptSize = strlen(script); + ip->scriptServp = ip->script; + + ip->lineNumber = 1; + ip->lineLength = 0; + ip->lineColumn = 0; + ip->line = NULL; + + ip->putBackIndex = -1; + + return 0; +} + +/******************************************************************************/ +/* + * Close the input script + */ + +void ejsLexCloseScript(Ejs *ep) +{ + EjsInput *ip; + int i; + + mprAssert(ep); + + ip = ep->input; + mprAssert(ip); + + for (i = 0; i < EJS_TOKEN_STACK; i++) { + mprFree(ip->putBack[i].token); + ip->putBack[i].token = 0; + } + + mprFree(ip->line); + mprFree(ip->tokbuf); + mprFree(ip->script); + + mprFree(ip); +} + +/******************************************************************************/ +/* + * Initialize an input state structure + */ + +int ejsInitInputState(EjsInput *ip) +{ + mprAssert(ip); + + memset(ip, 0, sizeof(*ip)); + ip->putBackIndex = -1; + + return 0; +} +/******************************************************************************/ +/* + * Save the input state + */ + +void ejsLexSaveInputState(Ejs *ep, EjsInput *state) +{ + EjsInput *ip; + int i; + + mprAssert(ep); + + ip = ep->input; + mprAssert(ip); + + *state = *ip; + + for (i = 0; i < ip->putBackIndex; i++) { + state->putBack[i].token = mprStrdup(ip->putBack[i].token); + state->putBack[i].id = ip->putBack[i].id; + } + for (; i < EJS_TOKEN_STACK; i++) { + state->putBack[i].token = 0; + } + + state->line = mprMalloc(ip->lineLength); + mprStrcpy(state->line, ip->lineLength, ip->line); + + state->lineColumn = ip->lineColumn; + state->lineNumber = ip->lineNumber; + state->lineLength = ip->lineLength; +} + +/******************************************************************************/ +/* + * Restore the input state + */ + +void ejsLexRestoreInputState(Ejs *ep, EjsInput *state) +{ + EjsInput *ip; + int i; + + mprAssert(ep); + mprAssert(state); + + ip = ep->input; + mprAssert(ip); + + ip->tokbuf = state->tokbuf; + ip->tokServp = state->tokServp; + ip->tokEndp = state->tokEndp; + ip->tokSize = state->tokSize; + + ip->script = state->script; + ip->scriptServp = state->scriptServp; + ip->scriptSize = state->scriptSize; + + ip->putBackIndex = state->putBackIndex; + for (i = 0; i < ip->putBackIndex; i++) { + mprFree(ip->putBack[i].token); + ip->putBack[i].id = state->putBack[i].id; + ip->putBack[i].token = mprStrdup(state->putBack[i].token); + } + + mprFree(ip->line); + ip->line = mprMalloc(state->lineLength); + mprStrcpy(ip->line, state->lineLength, state->line); + + ip->lineColumn = state->lineColumn; + ip->lineNumber = state->lineNumber; + ip->lineLength = state->lineLength; +} + +/******************************************************************************/ +/* + * Free a saved input state + */ + +void ejsLexFreeInputState(Ejs *ep, EjsInput *state) +{ + int i; + + mprAssert(ep); + mprAssert(state); + + for (i = 0; i < EJS_TOKEN_STACK; i++) { + mprFree(state->putBack[i].token); + } + state->putBackIndex = -1; + mprFree(state->line); + state->lineLength = 0; + state->lineColumn = 0; +} + +/******************************************************************************/ +/* + * Get the next EJS token + */ + +int ejsLexGetToken(Ejs *ep, int state) +{ + mprAssert(ep); + + ep->tid = getLexicalToken(ep, state); + return ep->tid; +} + +/******************************************************************************/ + +/* + * Check for reserved words "if", "else", "var", "for", "foreach", + * "delete", "function", and "return". "new", "in" and "function" + * done below. "true", "false", "null", "undefined" are handled + * as global objects. + * + * Other reserved words not supported: + * "break", "case", "catch", "continue", "default", "do", + * "finally", "instanceof", "switch", "this", "throw", "try", + * "typeof", "while", "with" + * + * ECMA extensions reserved words (not supported): + * "abstract", "boolean", "byte", "char", "class", "const", + * "debugger", "double", "enum", "export", "extends", + * "final", "float", "goto", "implements", "import", "int", + * "interface", "long", "native", "package", "private", + * "protected", "public", "short", "static", "super", + * "synchronized", "throws", "transient", "volatile" + */ + +static int checkReservedWord(Ejs *ep, int state, int c, int tid) +{ + if (state == EJS_STATE_STMT) { + if (strcmp(ep->token, "if") == 0) { + inputPutback(ep, c); + return EJS_TOK_IF; + } else if (strcmp(ep->token, "else") == 0) { + inputPutback(ep, c); + return EJS_TOK_ELSE; + } else if (strcmp(ep->token, "var") == 0) { + inputPutback(ep, c); + return EJS_TOK_VAR; + } else if (strcmp(ep->token, "for") == 0) { + inputPutback(ep, c); + return EJS_TOK_FOR; + } else if (strcmp(ep->token, "delete") == 0) { + inputPutback(ep, c); + return EJS_TOK_DELETE; + } else if (strcmp(ep->token, "function") == 0) { + inputPutback(ep, c); + return EJS_TOK_FUNCTION; + } else if (strcmp(ep->token, "return") == 0) { + if ((c == ';') || (c == '(')) { + inputPutback(ep, c); + } + return EJS_TOK_RETURN; + } + } else if (state == EJS_STATE_EXPR) { + if (strcmp(ep->token, "new") == 0) { + inputPutback(ep, c); + return EJS_TOK_NEW; + } else if (strcmp(ep->token, "in") == 0) { + inputPutback(ep, c); + return EJS_TOK_IN; + } else if (strcmp(ep->token, "function") == 0) { + inputPutback(ep, c); + return EJS_TOK_FUNCTION; + } + } + return tid; +} + +/******************************************************************************/ +/* + * Get the next EJS token + */ + +static int getLexicalToken(Ejs *ep, int state) +{ + MprType type; + EjsInput *ip; + int done, tid, c, quote, style, idx; + + mprAssert(ep); + ip = ep->input; + mprAssert(ip); + + ep->tid = -1; + tid = -1; + type = BLD_FEATURE_NUM_TYPE_ID; + + /* + * Use a putback tokens first. Don't free strings as caller needs access. + */ + if (ip->putBackIndex >= 0) { + idx = ip->putBackIndex; + tid = ip->putBack[idx].id; + ep->token = (char*) ip->putBack[idx].token; + tid = checkReservedWord(ep, state, 0, tid); + ip->putBackIndex--; + return tid; + } + ep->token = ip->tokServp = ip->tokEndp = ip->tokbuf; + *ip->tokServp = '\0'; + + if ((c = inputGetc(ep)) < 0) { + return EJS_TOK_EOF; + } + + /* + * Main lexical analyser + */ + for (done = 0; !done; ) { + switch (c) { + case -1: + return EJS_TOK_EOF; + + case ' ': + case '\t': + case '\r': + do { + if ((c = inputGetc(ep)) < 0) + break; + } while (c == ' ' || c == '\t' || c == '\r'); + break; + + case '\n': + return EJS_TOK_NEWLINE; + + case '(': + tokenAddChar(ep, c); + return EJS_TOK_LPAREN; + + case ')': + tokenAddChar(ep, c); + return EJS_TOK_RPAREN; + + case '[': + tokenAddChar(ep, c); + return EJS_TOK_LBRACKET; + + case ']': + tokenAddChar(ep, c); + return EJS_TOK_RBRACKET; + + case '.': + tokenAddChar(ep, c); + return EJS_TOK_PERIOD; + + case '{': + tokenAddChar(ep, c); + return EJS_TOK_LBRACE; + + case '}': + tokenAddChar(ep, c); + return EJS_TOK_RBRACE; + + case '+': + if ((c = inputGetc(ep)) < 0) { + ejsError(ep, "Syntax Error"); + return EJS_TOK_ERR; + } + if (c != '+' ) { + inputPutback(ep, c); + tokenAddChar(ep, EJS_EXPR_PLUS); + return EJS_TOK_EXPR; + } + tokenAddChar(ep, EJS_EXPR_INC); + return EJS_TOK_INC_DEC; + + case '-': + if ((c = inputGetc(ep)) < 0) { + ejsError(ep, "Syntax Error"); + return EJS_TOK_ERR; + } + if (c != '-' ) { + inputPutback(ep, c); + tokenAddChar(ep, EJS_EXPR_MINUS); + return EJS_TOK_EXPR; + } + tokenAddChar(ep, EJS_EXPR_DEC); + return EJS_TOK_INC_DEC; + + case '*': + tokenAddChar(ep, EJS_EXPR_MUL); + return EJS_TOK_EXPR; + + case '%': + tokenAddChar(ep, EJS_EXPR_MOD); + return EJS_TOK_EXPR; + + case '/': + /* + * Handle the division operator and comments + */ + if ((c = inputGetc(ep)) < 0) { + ejsError(ep, "Syntax Error"); + return EJS_TOK_ERR; + } + if (c != '*' && c != '/') { + inputPutback(ep, c); + tokenAddChar(ep, EJS_EXPR_DIV); + return EJS_TOK_EXPR; + } + style = c; + /* + * Eat comments. Both C and C++ comment styles are supported. + */ + while (1) { + if ((c = inputGetc(ep)) < 0) { + ejsError(ep, "Syntax Error"); + return EJS_TOK_ERR; + } + if (c == '\n' && style == '/') { + break; + } else if (c == '*') { + c = inputGetc(ep); + if (style == '/') { + if (c == '\n') { + break; + } + } else { + if (c == '/') { + break; + } + } + } + } + /* + * Continue looking for a token, so get the next character + */ + if ((c = inputGetc(ep)) < 0) { + return EJS_TOK_EOF; + } + break; + + case '<': /* < and <= */ + if ((c = inputGetc(ep)) < 0) { + ejsError(ep, "Syntax Error"); + return EJS_TOK_ERR; + } + if (c == '<') { + tokenAddChar(ep, EJS_EXPR_LSHIFT); + return EJS_TOK_EXPR; + } else if (c == '=') { + tokenAddChar(ep, EJS_EXPR_LESSEQ); + return EJS_TOK_EXPR; + } + tokenAddChar(ep, EJS_EXPR_LESS); + inputPutback(ep, c); + return EJS_TOK_EXPR; + + case '>': /* > and >= */ + if ((c = inputGetc(ep)) < 0) { + ejsError(ep, "Syntax Error"); + return EJS_TOK_ERR; + } + if (c == '>') { + tokenAddChar(ep, EJS_EXPR_RSHIFT); + return EJS_TOK_EXPR; + } else if (c == '=') { + tokenAddChar(ep, EJS_EXPR_GREATEREQ); + return EJS_TOK_EXPR; + } + tokenAddChar(ep, EJS_EXPR_GREATER); + inputPutback(ep, c); + return EJS_TOK_EXPR; + + case '=': /* "==" */ + if ((c = inputGetc(ep)) < 0) { + ejsError(ep, "Syntax Error"); + return EJS_TOK_ERR; + } + if (c == '=') { + tokenAddChar(ep, EJS_EXPR_EQ); + return EJS_TOK_EXPR; + } + inputPutback(ep, c); + return EJS_TOK_ASSIGNMENT; + + case '!': /* "!=" or "!"*/ + if ((c = inputGetc(ep)) < 0) { + ejsError(ep, "Syntax Error"); + return EJS_TOK_ERR; + } + if (c == '=') { + tokenAddChar(ep, EJS_EXPR_NOTEQ); + return EJS_TOK_EXPR; + } + inputPutback(ep, c); + tokenAddChar(ep, EJS_EXPR_BOOL_COMP); + return EJS_TOK_EXPR; + + case ';': + tokenAddChar(ep, c); + return EJS_TOK_SEMI; + + case ',': + tokenAddChar(ep, c); + return EJS_TOK_COMMA; + + case '|': /* "||" */ + if ((c = inputGetc(ep)) < 0 || c != '|') { + ejsError(ep, "Syntax Error"); + return EJS_TOK_ERR; + } + tokenAddChar(ep, EJS_COND_OR); + return EJS_TOK_LOGICAL; + + case '&': /* "&&" */ + if ((c = inputGetc(ep)) < 0 || c != '&') { + ejsError(ep, "Syntax Error"); + return EJS_TOK_ERR; + } + tokenAddChar(ep, EJS_COND_AND); + return EJS_TOK_LOGICAL; + + case '\"': /* String quote */ + case '\'': + quote = c; + if ((c = inputGetc(ep)) < 0) { + ejsError(ep, "Syntax Error"); + return EJS_TOK_ERR; + } + + while (c != quote) { + /* + * Check for escape sequence characters + */ + if (c == '\\') { + c = inputGetc(ep); + + if (isdigit(c)) { + /* + * Octal support, \101 maps to 65 = 'A'. Put first + * char back so converter will work properly. + */ + inputPutback(ep, c); + c = charConvert(ep, 8, 3); + + } else { + switch (c) { + case 'n': + c = '\n'; break; + case 'b': + c = '\b'; break; + case 'f': + c = '\f'; break; + case 'r': + c = '\r'; break; + case 't': + c = '\t'; break; + case 'x': + /* + * Hex support, \x41 maps to 65 = 'A' + */ + c = charConvert(ep, 16, 2); + break; + case 'u': + /* + * Unicode support, \x0401 maps to 65 = 'A' + */ + c = charConvert(ep, 16, 2); + c = c*16 + charConvert(ep, 16, 2); + + break; + case '\'': + case '\"': + case '\\': + break; + default: + ejsError(ep, "Invalid Escape Sequence"); + return EJS_TOK_ERR; + } + } + if (tokenAddChar(ep, c) < 0) { + return EJS_TOK_ERR; + } + } else { + if (tokenAddChar(ep, c) < 0) { + return EJS_TOK_ERR; + } + } + if ((c = inputGetc(ep)) < 0) { + ejsError(ep, "Unmatched Quote"); + return EJS_TOK_ERR; + } + } + return EJS_TOK_LITERAL; + + case '0': + if (tokenAddChar(ep, c) < 0) { + return EJS_TOK_ERR; + } + if ((c = inputGetc(ep)) < 0) { + break; + } + if (tolower(c) == 'x') { + if (tokenAddChar(ep, c) < 0) { + return EJS_TOK_ERR; + } + if ((c = inputGetc(ep)) < 0) { + break; + } + } + if (! isdigit(c)) { +#if BLD_FEATURE_FLOATING_POINT + if (c == '.' || tolower(c) == 'e' || c == '+' || c == '-') { + /* Fall through */ + type = MPR_TYPE_FLOAT; + } else +#endif + { + mprDestroyVar(&ep->tokenNumber); + ep->tokenNumber = mprParseVar(ep->token, type); + inputPutback(ep, c); + return EJS_TOK_NUMBER; + } + } + /* Fall through to get more digits */ + + case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + do { + if (tokenAddChar(ep, c) < 0) { + return EJS_TOK_ERR; + } + if ((c = inputGetc(ep)) < 0) { + break; + } +#if BLD_FEATURE_FLOATING_POINT + if (c == '.' || tolower(c) == 'e' || c == '+' || c == '-') { + type = MPR_TYPE_FLOAT; + } + } while (isdigit(c) || c == '.' || tolower(c) == 'e' || + c == '+' || c == '-'); +#else + } while (isdigit(c)); +#endif + + mprDestroyVar(&ep->tokenNumber); + ep->tokenNumber = mprParseVar(ep->token, type); + inputPutback(ep, c); + return EJS_TOK_NUMBER; + + default: + /* + * Identifiers or a function names + */ + while (1) { + if (c == '\\') { + if ((c = inputGetc(ep)) < 0) { + break; + } + if (c == '\n' || c == '\r') { + break; + } + } else if (tokenAddChar(ep, c) < 0) { + break; + } + if ((c = inputGetc(ep)) < 0) { + break; + } + if (!isalnum(c) && c != '$' && c != '_' && c != '\\') { + break; + } + } + if (*ep->token == '\0') { + c = inputGetc(ep); + break; + } + if (! isalpha((int) *ep->token) && *ep->token != '$' && + *ep->token != '_') { + ejsError(ep, "Invalid identifier %s", ep->token); + return EJS_TOK_ERR; + } + + tid = checkReservedWord(ep, state, c, EJS_TOK_ID); + if (tid != EJS_TOK_ID) { + return tid; + } + + /* + * Skip white space after token to find out whether this is + * a function or not. + */ + while (c == ' ' || c == '\t' || c == '\r' || c == '\n') { + if ((c = inputGetc(ep)) < 0) + break; + } + + tid = EJS_TOK_ID; + done++; + } + } + + /* + * Putback the last extra character for next time + */ + inputPutback(ep, c); + return tid; +} + +/******************************************************************************/ +/* + * Convert a hex or octal character back to binary, return original char if + * not a hex digit + */ + +static int charConvert(Ejs *ep, int base, int maxDig) +{ + int i, c, lval, convChar; + + lval = 0; + for (i = 0; i < maxDig; i++) { + if ((c = inputGetc(ep)) < 0) { + break; + } + /* + * Initialize to out of range value + */ + convChar = base; + if (isdigit(c)) { + convChar = c - '0'; + } else if (c >= 'a' && c <= 'f') { + convChar = c - 'a' + 10; + } else if (c >= 'A' && c <= 'F') { + convChar = c - 'A' + 10; + } + /* + * If unexpected character then return it to buffer. + */ + if (convChar >= base) { + inputPutback(ep, c); + break; + } + lval = (lval * base) + convChar; + } + return lval; +} + +/******************************************************************************/ +/* + * Putback the last token read. Accept at most one push back token. + */ + +void ejsLexPutbackToken(Ejs *ep, int tid, char *string) +{ + EjsInput *ip; + int idx; + + mprAssert(ep); + ip = ep->input; + mprAssert(ip); + + ip->putBackIndex += 1; + idx = ip->putBackIndex; + ip->putBack[idx].id = tid; + + if (ip->putBack[idx].token) { + if (ip->putBack[idx].token == string) { + return; + } + mprFree(ip->putBack[idx].token); + } + ip->putBack[idx].token = mprStrdup(string); +} + +/******************************************************************************/ +/* + * Add a character to the token buffer + */ + +static int tokenAddChar(Ejs *ep, int c) +{ + EjsInput *ip; + uchar *oldbuf; + + mprAssert(ep); + ip = ep->input; + mprAssert(ip); + + if (ip->tokEndp >= &ip->tokbuf[ip->tokSize - 1]) { + ip->tokSize += EJS_PARSE_INCR; + oldbuf = ip->tokbuf; + ip->tokbuf = mprRealloc(ip->tokbuf, ip->tokSize); + if (ip->tokbuf == 0) { + ejsError(ep, "Token too big"); + return -1; + } + ip->tokEndp += (int) ((uchar*) ip->tokbuf - oldbuf); + ip->tokServp += (int) ((uchar*) ip->tokbuf - oldbuf); + ep->token += (int) ((uchar*) ip->tokbuf - oldbuf); + } + *ip->tokEndp++ = c; + *ip->tokEndp = '\0'; + + return 0; +} + +/******************************************************************************/ +/* + * Get another input character + */ + +static int inputGetc(Ejs *ep) +{ + EjsInput *ip; + int c; + + mprAssert(ep); + ip = ep->input; + + if (ip->scriptSize <= 0) { + return -1; + } + + c = (uchar) (*ip->scriptServp++); + ip->scriptSize--; + + /* + * For debugging, accumulate the line number and the currenly parsed line + */ + if (c == '\n') { +#if BLD_DEBUG && 0 + if (ip->lineColumn > 0) { + printf("PARSED: %s\n", ip->line); + } +#endif + ip->lineNumber++; + ip->lineColumn = 0; + } else { + if ((ip->lineColumn + 2) >= ip->lineLength) { + ip->lineLength += 80; + ip->line = mprRealloc(ip->line, ip->lineLength * sizeof(char)); + } + ip->line[ip->lineColumn++] = c; + ip->line[ip->lineColumn] = '\0'; + } + return c; +} + +/******************************************************************************/ +/* + * Putback a character onto the input queue + */ + +static void inputPutback(Ejs *ep, int c) +{ + EjsInput *ip; + + mprAssert(ep); + + if (c != 0) { + ip = ep->input; + *--ip->scriptServp = c; + ip->scriptSize++; + ip->lineColumn--; + ip->line[ip->lineColumn] = '\0'; + } +} + +/******************************************************************************/ + +#else +void ejsLexDummy() {} + +/******************************************************************************/ +#endif /* BLD_FEATURE_EJS */ + +/* + * Local variables: + * tab-width: 4 + * c-basic-offset: 4 + * End: + * vim:tw=78 + * vim600: sw=4 ts=4 fdm=marker + * vim<600: sw=4 ts=4 + */ -- cgit