summaryrefslogtreecommitdiff
path: root/webapps/qooxdoo-0.6.3-sdk/frontend/framework/tool/modules/tokenizer.py
diff options
context:
space:
mode:
Diffstat (limited to 'webapps/qooxdoo-0.6.3-sdk/frontend/framework/tool/modules/tokenizer.py')
-rwxr-xr-xwebapps/qooxdoo-0.6.3-sdk/frontend/framework/tool/modules/tokenizer.py349
1 files changed, 0 insertions, 349 deletions
diff --git a/webapps/qooxdoo-0.6.3-sdk/frontend/framework/tool/modules/tokenizer.py b/webapps/qooxdoo-0.6.3-sdk/frontend/framework/tool/modules/tokenizer.py
deleted file mode 100755
index 2f8e40436b..0000000000
--- a/webapps/qooxdoo-0.6.3-sdk/frontend/framework/tool/modules/tokenizer.py
+++ /dev/null
@@ -1,349 +0,0 @@
-#!/usr/bin/env python
-
-import sys, string, re, optparse
-import config, filetool, comment
-
-R_WHITESPACE = re.compile(r"(\s+)")
-R_NONWHITESPACE = re.compile("\S+")
-R_NUMBER = re.compile("^[0-9]+")
-R_NEWLINE = re.compile(r"(\n)")
-
-# Ideas from: http://www.regular-expressions.info/examplesprogrammer.html
-# Multicomment RegExp inspired by: http://ostermiller.org/findcomment.html
-
-# builds regexp strings
-S_STRING_A = "'[^'\\\n]*(\\.|\n[^'\\\n]*)*'"
-S_STRING_B = '"[^"\\\n]*(\\.|\n[^"\\\n]*)*"'
-
-S_FLOAT = "([0-9]+\.[0-9]+)"
-
-S_OPERATORS_2 = r"(==)|(!=)|(\+\+)|(--)|(-=)|(\+=)|(\*=)|(/=)|(%=)|(&&)|(\|\|)|(\>=)|(\<=)|(>>)|(<<)|(\^\|)|(\|=)|(\^=)|(&=)|(::)|(\.\.)"
-S_OPERATORS_3 = r"(===)|(!==)|(\<\<=)|(\>\>=)|(\>\>\>)"
-S_OPERATORS_4 = r"(\>\>\>=)"
-S_OPERATORS = "(" + S_OPERATORS_4 + "|" + S_OPERATORS_3 + "|" + S_OPERATORS_2 + ")"
-
-S_REGEXP = "(\/[^\t\n\r\f\v\/]+?\/[mgi]*)"
-S_REGEXP_A = "\.(match|search|split)\s*\(\s*\(*\s*" + S_REGEXP + "\s*\)*\s*\)"
-S_REGEXP_B = "\.(replace)\s*\(\s*\(*\s*" + S_REGEXP + "\s*\)*\s*?,?"
-S_REGEXP_C = "\s*\(*\s*" + S_REGEXP + "\)*\.(test|exec)\s*\(\s*"
-S_REGEXP_D = "(:|=|\?)\s*\(*\s*" + S_REGEXP + "\s*\)*"
-S_REGEXP_ALL = S_REGEXP_A + "|" + S_REGEXP_B + "|" + S_REGEXP_C + "|" + S_REGEXP_D
-
-S_ALL = "(" + comment.S_BLOCK_COMMENT + "|" + comment.S_INLINE_COMMENT + "|" + S_STRING_A + "|" + S_STRING_B + "|" + S_REGEXP_ALL + "|" + S_FLOAT + "|" + S_OPERATORS + ")"
-
-# compile regexp strings
-R_STRING_A = re.compile("^" + S_STRING_A + "$")
-R_STRING_B = re.compile("^" + S_STRING_B + "$")
-R_FLOAT = re.compile("^" + S_FLOAT + "$")
-R_OPERATORS = re.compile(S_OPERATORS)
-R_REGEXP = re.compile(S_REGEXP)
-R_REGEXP_A = re.compile(S_REGEXP_A)
-R_REGEXP_B = re.compile(S_REGEXP_B)
-R_REGEXP_C = re.compile(S_REGEXP_C)
-R_REGEXP_D = re.compile(S_REGEXP_D)
-R_ALL = re.compile(S_ALL)
-
-
-
-
-parseLine = 1
-parseColumn = 1
-parseUniqueId = ""
-
-
-
-def protectEscape(s):
- return s.replace("\\\\", "__$ESCAPE0$__").replace("\\\"", "__$ESCAPE1$__").replace("\\\'", "__$ESCAPE2__").replace("\/", "__$ESCAPE3__").replace("\!", "__$ESCAPE4__")
-
-
-
-def recoverEscape(s):
- return s.replace("__$ESCAPE0$__", "\\\\").replace("__$ESCAPE1$__", "\\\"").replace("__$ESCAPE2__", "\\'").replace("__$ESCAPE3__", "\/").replace("__$ESCAPE4__", "\!")
-
-
-
-def parseElement(element):
- global parseUniqueId
- global parseLine
- global parseColumn
-
- if config.JSPROTECTED.has_key(element):
- # print "PROTECTED: %s" % PROTECTED[content]
- obj = { "type" : "protected", "detail" : config.JSPROTECTED[element], "source" : element, "line" : parseLine, "column" : parseColumn, "id" : parseUniqueId }
-
- elif element in config.JSBUILTIN:
- # print "BUILTIN: %s" % content
- obj = { "type" : "builtin", "detail" : "", "source" : element, "line" : parseLine, "column" : parseColumn, "id" : parseUniqueId }
-
- elif R_NUMBER.search(element):
- # print "NUMBER: %s" % content
- obj = { "type" : "number", "detail" : "int", "source" : element, "line" : parseLine, "column" : parseColumn, "id" : parseUniqueId }
-
- elif element.startswith("_"):
- # print "PRIVATE NAME: %s" % content
- obj = { "type" : "name", "detail" : "private", "source" : element, "line" : parseLine, "column" : parseColumn, "id" : parseUniqueId }
-
- elif len(element) > 0:
- # print "PUBLIC NAME: %s" % content
- obj = { "type" : "name", "detail" : "public", "source" : element, "line" : parseLine, "column" : parseColumn, "id" : parseUniqueId }
-
- parseColumn += len(element)
-
- return obj
-
-
-def parsePart(part):
- global parseUniqueId
- global parseLine
- global parseColumn
-
- tokens = []
- element = ""
-
- for line in R_NEWLINE.split(part):
- if line == "\n":
- tokens.append({ "type" : "eol", "source" : "", "detail" : "", "line" : parseLine, "column" : parseColumn, "id" : parseUniqueId })
- parseColumn = 1
- parseLine += 1
-
- else:
- for item in R_WHITESPACE.split(line):
- if item == "":
- continue
-
- if not R_NONWHITESPACE.search(item):
- parseColumn += len(item)
- continue
-
- # print "ITEM: '%s'" % item
-
- for char in item:
- # work on single character tokens, otherwise concat to a bigger element
- if config.JSTOKENS.has_key(char):
- # convert existing element
- if element != "":
- if R_NONWHITESPACE.search(element):
- tokens.append(parseElement(element))
-
- element = ""
-
- # add character to token list
- tokens.append({ "type" : "token", "detail" : config.JSTOKENS[char], "source" : char, "line" : parseLine, "column" : parseColumn, "id" : parseUniqueId })
- parseColumn += 1
-
- else:
- element += char
-
- # convert remaining stuff to tokens
- if element != "":
- if R_NONWHITESPACE.search(element):
- tokens.append(parseElement(element))
-
- element = ""
-
- return tokens
-
-
-
-def parseFragmentLead(content, fragment, tokens):
- pos = content.find(fragment)
-
- if pos > 0:
- tokens.extend(parsePart(recoverEscape(content[0:pos])))
-
- return content[pos+len(fragment):]
-
-
-
-def hasLeadingContent(tokens):
- pos = len(tokens) - 1
- while pos > 0:
- if tokens[pos]["type"] == "eol":
- break
-
- else:
- return True
-
- return False
-
-
-
-
-
-def parseStream(content, uniqueId=""):
- # make global variables available
- global parseLine
- global parseColumn
- global parseUniqueId
-
- # reset global stuff
- parseColumn = 1
- parseLine = 1
- parseUniqueId = uniqueId
-
- # prepare storage
- tokens = []
- content = protectEscape(content)
-
- # print " * searching for patterns..."
- all = R_ALL.findall(content)
-
- # print " * structuring..."
- for item in all:
- fragment = item[0]
-
- # print "Found: '%s'" % fragment
-
- if comment.R_BLOCK_COMMENT.match(fragment):
- source = recoverEscape(fragment)
- format = comment.getFormat(source)
- multiline = comment.isMultiLine(source)
-
- # print "Type:MultiComment"
- content = parseFragmentLead(content, fragment, tokens)
-
- atBegin = not hasLeadingContent(tokens)
- if re.compile("^\s*\n").search(content):
- atEnd = True
- else:
- atEnd = False
-
- # print "Begin: %s, End: %s" % (atBegin, atEnd)
-
- # Fixing source content
- if atBegin:
- source = comment.outdent(source, parseColumn - 1)
-
- source = comment.correct(source)
-
- connection = "before"
-
- if atEnd and not atBegin:
- connection = "after"
- else:
- connection = "before"
-
- tokens.append({ "type" : "comment", "detail" : format, "multiline" : multiline, "connection" : connection, "source" : source, "id" : parseUniqueId, "line" : parseLine, "column" : parseColumn, "begin" : atBegin, "end" : atEnd })
- parseLine += len(fragment.split("\n")) - 1
-
- elif comment.R_INLINE_COMMENT.match(fragment):
- # print "Type:SingleComment"
- source = recoverEscape(fragment)
- content = parseFragmentLead(content, fragment, tokens)
-
- atBegin = hasLeadingContent(tokens)
- atEnd = True
-
- if atBegin:
- connection = "after"
- else:
- connection = "before"
-
- source = comment.correct(source)
-
- tokens.append({ "type" : "comment", "detail" : "inline", "multiline" : False, "connection" : connection, "source" : source, "id" : parseUniqueId, "line" : parseLine, "column" : parseColumn, "begin" : atBegin, "end" : atEnd })
-
- elif R_STRING_A.match(fragment):
- # print "Type:StringA: %s" % fragment
- content = parseFragmentLead(content, fragment, tokens)
- tokens.append({ "type" : "string", "detail" : "singlequotes", "source" : recoverEscape(fragment)[1:-1].replace("\\\n",""), "id" : parseUniqueId, "line" : parseLine, "column" : parseColumn })
-
- elif R_STRING_B.match(fragment):
- # print "Type:StringB: %s" % fragment
- content = parseFragmentLead(content, fragment, tokens)
- tokens.append({ "type" : "string", "detail" : "doublequotes", "source" : recoverEscape(fragment)[1:-1].replace("\\\n",""), "id" : parseUniqueId, "line" : parseLine, "column" : parseColumn })
-
- elif R_FLOAT.match(fragment):
- # print "Type:Float: %s" % fragment
- content = parseFragmentLead(content, fragment, tokens)
- tokens.append({ "type" : "number", "detail" : "float", "source" : fragment, "id" : parseUniqueId, "line" : parseLine, "column" : parseColumn })
-
- elif R_OPERATORS.match(fragment):
- # print "Type:Operator: %s" % fragment
- content = parseFragmentLead(content, fragment, tokens)
- tokens.append({ "type" : "token", "detail" : config.JSTOKENS[fragment], "source" : fragment, "id" : parseUniqueId, "line" : parseLine, "column" : parseColumn })
-
- else:
- fragresult = R_REGEXP.search(fragment)
-
- if fragresult:
- # print "Type:RegExp: %s" % fragresult.group(0)
-
- if R_REGEXP_A.match(fragment) or R_REGEXP_B.match(fragment) or R_REGEXP_C.match(fragment) or R_REGEXP_D.match(fragment):
- content = parseFragmentLead(content, fragresult.group(0), tokens)
- tokens.append({ "type" : "regexp", "detail" : "", "source" : recoverEscape(fragresult.group(0)), "id" : parseUniqueId, "line" : parseLine, "column" : parseColumn })
-
- else:
- print "Bad regular expression: %s" % fragresult.group(0)
-
- else:
- print "Type:None!"
-
- tokens.extend(parsePart(recoverEscape(content)))
- tokens.append({ "type" : "eof", "source" : "", "detail" : "", "id" : parseUniqueId, "line" : parseLine, "column" : parseColumn })
-
- return tokens
-
-
-
-def parseFile(fileName, uniqueId="", encoding="utf-8"):
- return parseStream(filetool.read(fileName, encoding), uniqueId)
-
-
-
-
-def convertTokensToString(tokens):
- tokenizedString = ""
-
- for token in tokens:
- tokenizedString += "%s%s" % (token, "\n")
-
- return tokenizedString
-
-
-
-
-
-def main():
- parser = optparse.OptionParser()
-
- parser.add_option("-w", "--write", action="store_true", dest="write", default=False, help="Writes file to incoming fileName + EXTENSION.")
- parser.add_option("-e", "--extension", dest="extension", metavar="EXTENSION", help="The EXTENSION to use", default=".tokenized")
- parser.add_option("--encoding", dest="encoding", default="utf-8", metavar="ENCODING", help="Defines the encoding expected for input files.")
-
- (options, args) = parser.parse_args()
-
- if len(args) == 0:
- print "Needs one or more arguments (files) to tokenize!"
- sys.exit(1)
-
- for fileName in args:
- if options.write:
- print "Compiling %s => %s%s" % (fileName, fileName, options.extension)
- else:
- print "Compiling %s => stdout" % fileName
-
- tokenString = convertTokensToString(parseFile(fileName, "", options.encoding))
-
- if options.write:
- filetool.save(fileName + options.extension, tokenString, options.encoding)
-
- else:
- try:
- print tokenString
-
- except UnicodeEncodeError:
- print " * Could not encode result to ascii. Use '-w' instead."
- sys.exit(1)
-
-
-
-
-if __name__ == '__main__':
- try:
- main()
-
- except KeyboardInterrupt:
- print
- print " * Keyboard Interrupt"
- sys.exit(1)