diff options
Diffstat (limited to 'source4/lib/appweb/ejs/ejsLex.c')
-rw-r--r-- | source4/lib/appweb/ejs/ejsLex.c | 923 |
1 files changed, 0 insertions, 923 deletions
diff --git a/source4/lib/appweb/ejs/ejsLex.c b/source4/lib/appweb/ejs/ejsLex.c deleted file mode 100644 index b9a363cfc9..0000000000 --- a/source4/lib/appweb/ejs/ejsLex.c +++ /dev/null @@ -1,923 +0,0 @@ -/* - * @file ejsLex.c - * @brief EJS Lexical Analyser - * @overview EJS lexical analyser. This implementes a lexical analyser - * for a subset of the JavaScript language. - */ -/********************************* Copyright **********************************/ -/* - * @copy default.g - * - * Copyright (c) Mbedthis Software LLC, 2003-2005. All Rights Reserved. - * Portions Copyright (c) GoAhead Software, 1995-2000. All Rights Reserved. - * - * This software is distributed under commercial and open source licenses. - * You may use the GPL open source license described below or you may acquire - * a commercial license from Mbedthis Software. You agree to be fully bound - * by the terms of either license. Consult the LICENSE.TXT distributed with - * this software for full details. - * - * This software is open source; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. See the GNU General Public License for more - * details at: http://www.mbedthis.com/downloads/gplLicense.html - * - * This program is distributed WITHOUT ANY WARRANTY; without even the - * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - * - * This GPL license does NOT permit incorporating this software into - * proprietary programs. If you are unable to comply with the GPL, you must - * acquire a commercial license to use this software. Commercial licenses - * for this software and support services are available from Mbedthis - * Software at http://www.mbedthis.com - * - * @end - */ -/********************************** Includes **********************************/ - -#include "ejsInternal.h" - -#if BLD_FEATURE_EJS - -/****************************** Forward Declarations **************************/ - -static int getLexicalToken(Ejs *ep, int state); -static int tokenAddChar(Ejs *ep, int c); -static int inputGetc(Ejs *ep); -static void inputPutback(Ejs *ep, int c); -static int charConvert(Ejs *ep, int base, int maxDig); - -/************************************* Code ***********************************/ -/* - * Open a new input script - */ - -int ejsLexOpenScript(Ejs *ep, char *script) -{ - EjsInput *ip; - - mprAssert(ep); - mprAssert(script); - - if ((ip = mprMalloc(sizeof(EjsInput))) == NULL) { - return -1; - } - memset(ip, 0, sizeof(*ip)); - ip->next = ep->input; - ep->input = ip; - ip->procName = ep->proc?ep->proc->procName:NULL; - -/* - * Create the parse token buffer and script buffer - */ - ip->tokbuf = mprMalloc(EJS_PARSE_INCR); - ip->tokSize = EJS_PARSE_INCR; - ip->tokServp = ip->tokbuf; - ip->tokEndp = ip->tokbuf; - - ip->script = mprStrdup(script); - ip->scriptSize = strlen(script); - ip->scriptServp = ip->script; - - ip->lineNumber = 1; - ip->lineLength = 0; - ip->lineColumn = 0; - ip->line = NULL; - - ip->putBackIndex = -1; - - return 0; -} - -/******************************************************************************/ -/* - * Close the input script - */ - -void ejsLexCloseScript(Ejs *ep) -{ - EjsInput *ip; - int i; - - mprAssert(ep); - - ip = ep->input; - mprAssert(ip); - ep->input = ip->next; - - for (i = 0; i < EJS_TOKEN_STACK; i++) { - mprFree(ip->putBack[i].token); - ip->putBack[i].token = 0; - } - - mprFree(ip->line); - mprFree(ip->tokbuf); - mprFree(ip->script); - - mprFree(ip); -} - -/******************************************************************************/ -/* - * Initialize an input state structure - */ - -int ejsInitInputState(EjsInput *ip) -{ - mprAssert(ip); - - memset(ip, 0, sizeof(*ip)); - ip->putBackIndex = -1; - - return 0; -} -/******************************************************************************/ -/* - * Save the input state - */ - -void ejsLexSaveInputState(Ejs *ep, EjsInput *state) -{ - EjsInput *ip; - int i; - - mprAssert(ep); - - ip = ep->input; - mprAssert(ip); - - *state = *ip; - - for (i = 0; i < ip->putBackIndex; i++) { - state->putBack[i].token = mprStrdup(ip->putBack[i].token); - state->putBack[i].id = ip->putBack[i].id; - } - for (; i < EJS_TOKEN_STACK; i++) { - state->putBack[i].token = 0; - } - - state->line = mprMalloc(ip->lineLength); - mprStrcpy(state->line, ip->lineLength, ip->line); - - state->lineColumn = ip->lineColumn; - state->lineNumber = ip->lineNumber; - state->lineLength = ip->lineLength; -} - -/******************************************************************************/ -/* - * Restore the input state - */ - -void ejsLexRestoreInputState(Ejs *ep, EjsInput *state) -{ - EjsInput *ip; - int i; - - mprAssert(ep); - mprAssert(state); - - ip = ep->input; - mprAssert(ip); - - ip->tokbuf = state->tokbuf; - ip->tokServp = state->tokServp; - ip->tokEndp = state->tokEndp; - ip->tokSize = state->tokSize; - - ip->script = state->script; - ip->scriptServp = state->scriptServp; - ip->scriptSize = state->scriptSize; - - ip->putBackIndex = state->putBackIndex; - for (i = 0; i < ip->putBackIndex; i++) { - mprFree(ip->putBack[i].token); - ip->putBack[i].id = state->putBack[i].id; - ip->putBack[i].token = mprStrdup(state->putBack[i].token); - } - - mprFree(ip->line); - ip->line = mprMalloc(state->lineLength); - mprStrcpy(ip->line, state->lineLength, state->line); - - ip->lineColumn = state->lineColumn; - ip->lineNumber = state->lineNumber; - ip->lineLength = state->lineLength; -} - -/******************************************************************************/ -/* - * Free a saved input state - */ - -void ejsLexFreeInputState(Ejs *ep, EjsInput *state) -{ - int i; - - mprAssert(ep); - mprAssert(state); - - for (i = 0; i < EJS_TOKEN_STACK; i++) { - mprFree(state->putBack[i].token); - } - state->putBackIndex = -1; - mprFree(state->line); - state->lineLength = 0; - state->lineColumn = 0; -} - -/******************************************************************************/ -/* - * Get the next EJS token - */ - -int ejsLexGetToken(Ejs *ep, int state) -{ - mprAssert(ep); - - ep->tid = getLexicalToken(ep, state); - return ep->tid; -} - -/******************************************************************************/ - -/* - * Check for reserved words "if", "else", "var", "for", "foreach", - * "delete", "function", and "return". "new", "in" and "function" - * done below. "true", "false", "null", "undefined" are handled - * as global objects. - * - * Other reserved words not supported: - * "break", "case", "catch", "continue", "default", "do", - * "finally", "instanceof", "switch", "this", "throw", "try", - * "typeof", "while", "with" - * - * ECMA extensions reserved words (not supported): - * "abstract", "boolean", "byte", "char", "class", "const", - * "debugger", "double", "enum", "export", "extends", - * "final", "float", "goto", "implements", "import", "int", - * "interface", "long", "native", "package", "private", - * "protected", "public", "short", "static", "super", - * "synchronized", "throws", "transient", "volatile" - */ - -static int checkReservedWord(Ejs *ep, int state, int c, int tid) -{ - if (state == EJS_STATE_STMT) { - if (strcmp(ep->token, "if") == 0) { - inputPutback(ep, c); - return EJS_TOK_IF; - } else if (strcmp(ep->token, "else") == 0) { - inputPutback(ep, c); - return EJS_TOK_ELSE; - } else if (strcmp(ep->token, "var") == 0) { - inputPutback(ep, c); - return EJS_TOK_VAR; - } else if (strcmp(ep->token, "for") == 0) { - inputPutback(ep, c); - return EJS_TOK_FOR; - } else if (strcmp(ep->token, "delete") == 0) { - inputPutback(ep, c); - return EJS_TOK_DELETE; - } else if (strcmp(ep->token, "function") == 0) { - inputPutback(ep, c); - return EJS_TOK_FUNCTION; - } else if (strcmp(ep->token, "return") == 0) { - if ((c == ';') || (c == '(')) { - inputPutback(ep, c); - } - return EJS_TOK_RETURN; - } - } else if (state == EJS_STATE_EXPR) { - if (strcmp(ep->token, "new") == 0) { - inputPutback(ep, c); - return EJS_TOK_NEW; - } else if (strcmp(ep->token, "in") == 0) { - inputPutback(ep, c); - return EJS_TOK_IN; - } else if (strcmp(ep->token, "function") == 0) { - inputPutback(ep, c); - return EJS_TOK_FUNCTION; - } - } - return tid; -} - -/******************************************************************************/ -/* - * Get the next EJS token - */ - -static int getLexicalToken(Ejs *ep, int state) -{ - MprType type; - EjsInput *ip; - int done, tid, c, quote, style, idx; - - mprAssert(ep); - ip = ep->input; - mprAssert(ip); - - ep->tid = -1; - tid = -1; - type = BLD_FEATURE_NUM_TYPE_ID; - - /* - * Use a putback tokens first. Don't free strings as caller needs access. - */ - if (ip->putBackIndex >= 0) { - idx = ip->putBackIndex; - tid = ip->putBack[idx].id; - ep->token = (char*) ip->putBack[idx].token; - tid = checkReservedWord(ep, state, 0, tid); - ip->putBackIndex--; - return tid; - } - ep->token = ip->tokServp = ip->tokEndp = ip->tokbuf; - *ip->tokServp = '\0'; - - if ((c = inputGetc(ep)) < 0) { - return EJS_TOK_EOF; - } - - /* - * Main lexical analyser - */ - for (done = 0; !done; ) { - switch (c) { - case -1: - return EJS_TOK_EOF; - - case ' ': - case '\t': - case '\r': - do { - if ((c = inputGetc(ep)) < 0) - break; - } while (c == ' ' || c == '\t' || c == '\r'); - break; - - case '\n': - return EJS_TOK_NEWLINE; - - case '(': - tokenAddChar(ep, c); - return EJS_TOK_LPAREN; - - case ')': - tokenAddChar(ep, c); - return EJS_TOK_RPAREN; - - case '[': - tokenAddChar(ep, c); - return EJS_TOK_LBRACKET; - - case ']': - tokenAddChar(ep, c); - return EJS_TOK_RBRACKET; - - case '.': - tokenAddChar(ep, c); - return EJS_TOK_PERIOD; - - case '{': - tokenAddChar(ep, c); - return EJS_TOK_LBRACE; - - case '}': - tokenAddChar(ep, c); - return EJS_TOK_RBRACE; - - case '+': - if ((c = inputGetc(ep)) < 0) { - ejsError(ep, "Syntax Error"); - return EJS_TOK_ERR; - } - if (c != '+' ) { - inputPutback(ep, c); - tokenAddChar(ep, EJS_EXPR_PLUS); - return EJS_TOK_EXPR; - } - tokenAddChar(ep, EJS_EXPR_INC); - return EJS_TOK_INC_DEC; - - case '-': - if ((c = inputGetc(ep)) < 0) { - ejsError(ep, "Syntax Error"); - return EJS_TOK_ERR; - } - if (c != '-' ) { - inputPutback(ep, c); - tokenAddChar(ep, EJS_EXPR_MINUS); - return EJS_TOK_EXPR; - } - tokenAddChar(ep, EJS_EXPR_DEC); - return EJS_TOK_INC_DEC; - - case '*': - tokenAddChar(ep, EJS_EXPR_MUL); - return EJS_TOK_EXPR; - - case '%': - tokenAddChar(ep, EJS_EXPR_MOD); - return EJS_TOK_EXPR; - - case '/': - /* - * Handle the division operator and comments - */ - if ((c = inputGetc(ep)) < 0) { - ejsError(ep, "Syntax Error"); - return EJS_TOK_ERR; - } - if (c != '*' && c != '/') { - inputPutback(ep, c); - tokenAddChar(ep, EJS_EXPR_DIV); - return EJS_TOK_EXPR; - } - style = c; - /* - * Eat comments. Both C and C++ comment styles are supported. - */ - while (1) { - if ((c = inputGetc(ep)) < 0) { - if (style == '/') { - return EJS_TOK_EOF; - } - ejsError(ep, "Syntax Error"); - return EJS_TOK_ERR; - } - if (c == '\n' && style == '/') { - break; - } else if (c == '*') { - c = inputGetc(ep); - if (style == '/') { - if (c == '\n') { - break; - } - } else { - if (c == '/') { - break; - } - } - } - } - /* - * Continue looking for a token, so get the next character - */ - if ((c = inputGetc(ep)) < 0) { - return EJS_TOK_EOF; - } - break; - - case '<': /* < and <= */ - if ((c = inputGetc(ep)) < 0) { - ejsError(ep, "Syntax Error"); - return EJS_TOK_ERR; - } - if (c == '<') { - tokenAddChar(ep, EJS_EXPR_LSHIFT); - return EJS_TOK_EXPR; - } else if (c == '=') { - tokenAddChar(ep, EJS_EXPR_LESSEQ); - return EJS_TOK_EXPR; - } - tokenAddChar(ep, EJS_EXPR_LESS); - inputPutback(ep, c); - return EJS_TOK_EXPR; - - case '>': /* > and >= */ - if ((c = inputGetc(ep)) < 0) { - ejsError(ep, "Syntax Error"); - return EJS_TOK_ERR; - } - if (c == '>') { - tokenAddChar(ep, EJS_EXPR_RSHIFT); - return EJS_TOK_EXPR; - } else if (c == '=') { - tokenAddChar(ep, EJS_EXPR_GREATEREQ); - return EJS_TOK_EXPR; - } - tokenAddChar(ep, EJS_EXPR_GREATER); - inputPutback(ep, c); - return EJS_TOK_EXPR; - - case '=': /* "==" */ - if ((c = inputGetc(ep)) < 0) { - ejsError(ep, "Syntax Error"); - return EJS_TOK_ERR; - } - if (c == '=') { - tokenAddChar(ep, EJS_EXPR_EQ); - return EJS_TOK_EXPR; - } - inputPutback(ep, c); - return EJS_TOK_ASSIGNMENT; - - case '!': /* "!=" or "!"*/ - if ((c = inputGetc(ep)) < 0) { - ejsError(ep, "Syntax Error"); - return EJS_TOK_ERR; - } - if (c == '=') { - tokenAddChar(ep, EJS_EXPR_NOTEQ); - return EJS_TOK_EXPR; - } - inputPutback(ep, c); - tokenAddChar(ep, EJS_EXPR_BOOL_COMP); - return EJS_TOK_EXPR; - - case ';': - tokenAddChar(ep, c); - return EJS_TOK_SEMI; - - case ',': - tokenAddChar(ep, c); - return EJS_TOK_COMMA; - - case '|': /* "||" */ - if ((c = inputGetc(ep)) < 0 || c != '|') { - ejsError(ep, "Syntax Error"); - return EJS_TOK_ERR; - } - tokenAddChar(ep, EJS_COND_OR); - return EJS_TOK_LOGICAL; - - case '&': /* "&&" */ - if ((c = inputGetc(ep)) < 0 || c != '&') { - ejsError(ep, "Syntax Error"); - return EJS_TOK_ERR; - } - tokenAddChar(ep, EJS_COND_AND); - return EJS_TOK_LOGICAL; - - case '\"': /* String quote */ - case '\'': - quote = c; - if ((c = inputGetc(ep)) < 0) { - ejsError(ep, "Syntax Error"); - return EJS_TOK_ERR; - } - - while (c != quote) { - /* - * Check for escape sequence characters - */ - if (c == '\\') { - c = inputGetc(ep); - - if (isdigit(c)) { - /* - * Octal support, \101 maps to 65 = 'A'. Put first - * char back so converter will work properly. - */ - inputPutback(ep, c); - c = charConvert(ep, 8, 3); - - } else { - switch (c) { - case 'n': - c = '\n'; break; - case 'b': - c = '\b'; break; - case 'f': - c = '\f'; break; - case 'r': - c = '\r'; break; - case 't': - c = '\t'; break; - case 'x': - /* - * Hex support, \x41 maps to 65 = 'A' - */ - c = charConvert(ep, 16, 2); - break; - case 'u': - /* - * Unicode support, \x0401 maps to 65 = 'A' - */ - c = charConvert(ep, 16, 2); - c = c*16 + charConvert(ep, 16, 2); - - break; - case '\'': - case '\"': - case '\\': - break; - default: - ejsError(ep, "Invalid Escape Sequence"); - return EJS_TOK_ERR; - } - } - if (tokenAddChar(ep, c) < 0) { - return EJS_TOK_ERR; - } - } else { - if (tokenAddChar(ep, c) < 0) { - return EJS_TOK_ERR; - } - } - if ((c = inputGetc(ep)) < 0) { - ejsError(ep, "Unmatched Quote"); - return EJS_TOK_ERR; - } - } - return EJS_TOK_LITERAL; - - case '0': - if (tokenAddChar(ep, c) < 0) { - return EJS_TOK_ERR; - } - if ((c = inputGetc(ep)) < 0) { - break; - } - if (tolower(c) == 'x') { - do { - if (tokenAddChar(ep, c) < 0) { - return EJS_TOK_ERR; - } - if ((c = inputGetc(ep)) < 0) { - break; - } - } while (isdigit(c) || (tolower(c) >= 'a' && tolower(c) <= 'f')); - - mprDestroyVar(&ep->tokenNumber); - ep->tokenNumber = mprParseVar(ep->token, type); - inputPutback(ep, c); - return EJS_TOK_NUMBER; - } - if (! isdigit(c)) { -#if BLD_FEATURE_FLOATING_POINT - if (c == '.' || tolower(c) == 'e' || c == '+' || c == '-') { - /* Fall through */ - type = MPR_TYPE_FLOAT; - } else -#endif - { - mprDestroyVar(&ep->tokenNumber); - ep->tokenNumber = mprParseVar(ep->token, type); - inputPutback(ep, c); - return EJS_TOK_NUMBER; - } - } - /* Fall through to get more digits */ - - case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - do { - if (tokenAddChar(ep, c) < 0) { - return EJS_TOK_ERR; - } - if ((c = inputGetc(ep)) < 0) { - break; - } -#if BLD_FEATURE_FLOATING_POINT - if (c == '.' || tolower(c) == 'e' || tolower(c) == 'f') { - type = MPR_TYPE_FLOAT; - } - } while (isdigit(c) || c == '.' || tolower(c) == 'e' || tolower(c) == 'f' || - ((type == MPR_TYPE_FLOAT) && (c == '+' || c == '-'))); -#else - } while (isdigit(c)); -#endif - - mprDestroyVar(&ep->tokenNumber); - ep->tokenNumber = mprParseVar(ep->token, type); - inputPutback(ep, c); - return EJS_TOK_NUMBER; - - default: - /* - * Identifiers or a function names - */ - while (1) { - if (c == '\\') { - if ((c = inputGetc(ep)) < 0) { - break; - } - if (c == '\n' || c == '\r') { - break; - } - } else if (tokenAddChar(ep, c) < 0) { - break; - } - if ((c = inputGetc(ep)) < 0) { - break; - } - if (!isalnum(c) && c != '$' && c != '_' && c != '\\') { - break; - } - } - if (*ep->token == '\0') { - c = inputGetc(ep); - break; - } - if (! isalpha((int) *ep->token) && *ep->token != '$' && - *ep->token != '_') { - ejsError(ep, "Invalid identifier %s", ep->token); - return EJS_TOK_ERR; - } - - tid = checkReservedWord(ep, state, c, EJS_TOK_ID); - if (tid != EJS_TOK_ID) { - return tid; - } - - /* - * Skip white space after token to find out whether this is - * a function or not. - */ - while (c == ' ' || c == '\t' || c == '\r' || c == '\n') { - if ((c = inputGetc(ep)) < 0) - break; - } - - tid = EJS_TOK_ID; - done++; - } - } - - /* - * Putback the last extra character for next time - */ - inputPutback(ep, c); - return tid; -} - -/******************************************************************************/ -/* - * Convert a hex or octal character back to binary, return original char if - * not a hex digit - */ - -static int charConvert(Ejs *ep, int base, int maxDig) -{ - int i, c, lval, convChar; - - lval = 0; - for (i = 0; i < maxDig; i++) { - if ((c = inputGetc(ep)) < 0) { - break; - } - /* - * Initialize to out of range value - */ - convChar = base; - if (isdigit(c)) { - convChar = c - '0'; - } else if (c >= 'a' && c <= 'f') { - convChar = c - 'a' + 10; - } else if (c >= 'A' && c <= 'F') { - convChar = c - 'A' + 10; - } - /* - * If unexpected character then return it to buffer. - */ - if (convChar >= base) { - inputPutback(ep, c); - break; - } - lval = (lval * base) + convChar; - } - return lval; -} - -/******************************************************************************/ -/* - * Putback the last token read. Accept at most one push back token. - */ - -void ejsLexPutbackToken(Ejs *ep, int tid, char *string) -{ - EjsInput *ip; - int idx; - - mprAssert(ep); - ip = ep->input; - mprAssert(ip); - - ip->putBackIndex += 1; - idx = ip->putBackIndex; - ip->putBack[idx].id = tid; - - if (ip->putBack[idx].token) { - if (ip->putBack[idx].token == string) { - return; - } - mprFree(ip->putBack[idx].token); - } - ip->putBack[idx].token = mprStrdup(string); -} - -/******************************************************************************/ -/* - * Add a character to the token buffer - */ - -static int tokenAddChar(Ejs *ep, int c) -{ - EjsInput *ip; - uchar *oldbuf; - - mprAssert(ep); - ip = ep->input; - mprAssert(ip); - - if (ip->tokEndp >= &ip->tokbuf[ip->tokSize - 1]) { - ip->tokSize += EJS_PARSE_INCR; - oldbuf = ip->tokbuf; - ip->tokbuf = mprRealloc(ip->tokbuf, ip->tokSize); - if (ip->tokbuf == 0) { - ejsError(ep, "Token too big"); - return -1; - } - ip->tokEndp += (int) ((uchar*) ip->tokbuf - oldbuf); - ip->tokServp += (int) ((uchar*) ip->tokbuf - oldbuf); - ep->token += (int) ((uchar*) ip->tokbuf - oldbuf); - } - *ip->tokEndp++ = c; - *ip->tokEndp = '\0'; - - return 0; -} - -/******************************************************************************/ -/* - * Get another input character - */ - -static int inputGetc(Ejs *ep) -{ - EjsInput *ip; - int c; - - mprAssert(ep); - ip = ep->input; - - if (ip->scriptSize <= 0) { - return -1; - } - - c = (uchar) (*ip->scriptServp++); - ip->scriptSize--; - - /* - * For debugging, accumulate the line number and the currenly parsed line - */ - if (c == '\n') { -#if BLD_DEBUG && 0 - if (ip->lineColumn > 0) { - printf("PARSED: %s\n", ip->line); - } -#endif - ip->lineNumber++; - ip->lineColumn = 0; - } else { - if ((ip->lineColumn + 2) >= ip->lineLength) { - ip->lineLength += 80; - ip->line = mprRealloc(ip->line, ip->lineLength * sizeof(char)); - } - ip->line[ip->lineColumn++] = c; - ip->line[ip->lineColumn] = '\0'; - } - return c; -} - -/******************************************************************************/ -/* - * Putback a character onto the input queue - */ - -static void inputPutback(Ejs *ep, int c) -{ - EjsInput *ip; - - mprAssert(ep); - - if (c != 0) { - ip = ep->input; - *--ip->scriptServp = c; - ip->scriptSize++; - ip->lineColumn--; - ip->line[ip->lineColumn] = '\0'; - } -} - -/******************************************************************************/ - -#else -void ejsLexDummy() {} - -/******************************************************************************/ -#endif /* BLD_FEATURE_EJS */ - -/* - * Local variables: - * tab-width: 4 - * c-basic-offset: 4 - * End: - * vim:tw=78 - * vim600: sw=4 ts=4 fdm=marker - * vim<600: sw=4 ts=4 - */ |