1 files changed, 349 insertions, 0 deletions
diff --git a/webapps/qooxdoo-0.6.3-sdk/frontend/framework/tool/modules/tokenizer.py b/webapps/qooxdoo-0.6.3-sdk/frontend/framework/tool/modules/tokenizer.py
new file mode 100755
index 0000000000..2f8e40436b
--- /dev/null
+++ b/webapps/qooxdoo-0.6.3-sdk/frontend/framework/tool/modules/tokenizer.py
@@ -0,0 +1,349 @@
+#!/usr/bin/env python
+
+import sys, string, re, optparse
+import config, filetool, comment
+
+R_WHITESPACE = re.compile(r"(\s+)")
+R_NONWHITESPACE = re.compile("\S+")
+R_NUMBER = re.compile("^[0-9]+")
+R_NEWLINE = re.compile(r"(\n)")
+
+# Ideas from: http://www.regular-expressions.info/examplesprogrammer.html
+# Multicomment RegExp inspired by: http://ostermiller.org/findcomment.html
+
+# builds regexp strings
+S_STRING_A = "'[^'\\\n]*(\\.|\n[^'\\\n]*)*'"
+S_STRING_B = '"[^"\\\n]*(\\.|\n[^"\\\n]*)*"'
+
+S_FLOAT = "([0-9]+\.[0-9]+)"
+
+S_OPERATORS_2 = r"(==)|(!=)|(\+\+)|(--)|(-=)|(\+=)|(\*=)|(/=)|(%=)|(&&)|(\|\|)|(\>=)|(\<=)|(>>)|(<<)|(\^\|)|(\|=)|(\^=)|(&=)|(::)|(\.\.)"
+S_OPERATORS_3 = r"(===)|(!==)|(\<\<=)|(\>\>=)|(\>\>\>)"
+S_OPERATORS_4 = r"(\>\>\>=)"
+S_OPERATORS = "(" + S_OPERATORS_4 + "|" + S_OPERATORS_3 + "|" + S_OPERATORS_2 + ")"
+
+S_REGEXP = "(\/[^\t\n\r\f\v\/]+?\/[mgi]*)"
+S_REGEXP_A = "\.(match|search|split)\s*\(\s*\(*\s*" + S_REGEXP + "\s*\)*\s*\)"
+S_REGEXP_B = "\.(replace)\s*\(\s*\(*\s*" + S_REGEXP + "\s*\)*\s*?,?"
+S_REGEXP_C = "\s*\(*\s*" + S_REGEXP + "\)*\.(test|exec)\s*\(\s*"
+S_REGEXP_D = "(:|=|\?)\s*\(*\s*" + S_REGEXP + "\s*\)*"
+S_REGEXP_ALL = S_REGEXP_A + "|" + S_REGEXP_B + "|" + S_REGEXP_C + "|" + S_REGEXP_D
+
+S_ALL = "(" + comment.S_BLOCK_COMMENT + "|" + comment.S_INLINE_COMMENT + "|" + S_STRING_A + "|" + S_STRING_B + "|" + S_REGEXP_ALL + "|" + S_FLOAT + "|" + S_OPERATORS + ")"
+
+# compile regexp strings
+R_STRING_A = re.compile("^" + S_STRING_A + "$")
+R_STRING_B = re.compile("^" + S_STRING_B + "$")
+R_FLOAT = re.compile("^" + S_FLOAT + "$")
+R_OPERATORS = re.compile(S_OPERATORS)
+R_REGEXP = re.compile(S_REGEXP)
+R_REGEXP_A = re.compile(S_REGEXP_A)
+R_REGEXP_B = re.compile(S_REGEXP_B)
+R_REGEXP_C = re.compile(S_REGEXP_C)
+R_REGEXP_D = re.compile(S_REGEXP_D)
+R_ALL = re.compile(S_ALL)
+
+
+
+
+parseLine = 1
+parseColumn = 1
+parseUniqueId = ""
+
+
+
+def protectEscape(s):
+  return s.replace("\\\\", "__$ESCAPE0$__").replace("\\\"", "__$ESCAPE1$__").replace("\\\'", "__$ESCAPE2__").replace("\/", "__$ESCAPE3__").replace("\!", "__$ESCAPE4__")
+
+
+
+def recoverEscape(s):
+  return s.replace("__$ESCAPE0$__", "\\\\").replace("__$ESCAPE1$__", "\\\"").replace("__$ESCAPE2__", "\\'").replace("__$ESCAPE3__", "\/").replace("__$ESCAPE4__", "\!")
+
+
+
+def parseElement(element):
+  global parseUniqueId
+  global parseLine
+  global parseColumn
+
+  if config.JSPROTECTED.has_key(element):
+    # print "PROTECTED: %s" % PROTECTED[content]
+    obj = { "type" : "protected", "detail" : config.JSPROTECTED[element], "source" : element, "line" : parseLine, "column" : parseColumn, "id" : parseUniqueId }
+
+  elif element in config.JSBUILTIN:
+    # print "BUILTIN: %s" % content
+    obj = { "type" : "builtin", "detail" : "", "source" : element, "line" : parseLine, "column" : parseColumn, "id" : parseUniqueId }
+
+  elif R_NUMBER.search(element):
+    # print "NUMBER: %s" % content
+    obj = { "type" : "number", "detail" : "int", "source" : element, "line" : parseLine, "column" : parseColumn, "id" : parseUniqueId }
+
+  elif element.startswith("_"):
+    # print "PRIVATE NAME: %s" % content
+    obj = { "type" : "name", "detail" : "private", "source" : element, "line" : parseLine, "column" : parseColumn, "id" : parseUniqueId }
+
+  elif len(element) > 0:
+    # print "PUBLIC NAME: %s" % content
+    obj = { "type" : "name", "detail" : "public", "source" : element, "line" : parseLine, "column" : parseColumn, "id" : parseUniqueId }
+
+  parseColumn += len(element)
+
+  return obj
+
+
+def parsePart(part):
+  global parseUniqueId
+  global parseLine
+  global parseColumn
+
+  tokens = []
+  element = ""
+
+  for line in R_NEWLINE.split(part):
+    if line == "\n":
+      tokens.append({ "type" : "eol", "source" : "", "detail" : "", "line" : parseLine, "column" : parseColumn, "id" : parseUniqueId })
+      parseColumn = 1
+      parseLine += 1
+
+    else:
+      for item in R_WHITESPACE.split(line):
+        if item == "":
+          continue
+
+        if not R_NONWHITESPACE.search(item):
+          parseColumn += len(item)
+          continue
+
+        # print "ITEM: '%s'" % item
+
+        for char in item:
+          # work on single character tokens, otherwise concat to a bigger element
+          if config.JSTOKENS.has_key(char):
+            # convert existing element
+            if element != "":
+              if R_NONWHITESPACE.search(element):
+                tokens.append(parseElement(element))
+
+              element = ""
+
+            # add character to token list
+            tokens.append({ "type" : "token", "detail" : config.JSTOKENS[char], "source" : char, "line" : parseLine, "column" : parseColumn, "id" : parseUniqueId })
+            parseColumn += 1
+
+          else:
+            element += char
+
+        # convert remaining stuff to tokens
+        if element != "":
+          if R_NONWHITESPACE.search(element):
+            tokens.append(parseElement(element))
+
+          element = ""
+
+  return tokens
+
+
+
+def parseFragmentLead(content, fragment, tokens):
+  pos = content.find(fragment)
+
+  if pos > 0:
+    tokens.extend(parsePart(recoverEscape(content[0:pos])))
+
+  return content[pos+len(fragment):]
+
+
+
+def hasLeadingContent(tokens):
+  pos = len(tokens) - 1
+  while pos > 0:
+    if tokens[pos]["type"] == "eol":
+      break
+
+    else:
+      return True
+
+  return False
+
+
+
+
+
+def parseStream(content, uniqueId=""):
+  # make global variables available
+  global parseLine
+  global parseColumn
+  global parseUniqueId
+
+  # reset global stuff
+  parseColumn = 1
+  parseLine = 1
+  parseUniqueId = uniqueId
+
+  # prepare storage
+  tokens = []
+  content = protectEscape(content)
+
+  # print "      * searching for patterns..."
+  all = R_ALL.findall(content)
+
+  # print "      * structuring..."
+  for item in all:
+    fragment = item[0]
+
+    # print "Found: '%s'" % fragment
+
+    if comment.R_BLOCK_COMMENT.match(fragment):
+      source = recoverEscape(fragment)
+      format = comment.getFormat(source)
+      multiline = comment.isMultiLine(source)
+
+      # print "Type:MultiComment"
+      content = parseFragmentLead(content, fragment, tokens)
+
+      atBegin = not hasLeadingContent(tokens)
+      if re.compile("^\s*\n").search(content):
+        atEnd = True
+      else:
+        atEnd = False
+
+      # print "Begin: %s, End: %s" % (atBegin, atEnd)
+
+      # Fixing source content
+      if atBegin:
+        source = comment.outdent(source, parseColumn - 1)
+
+      source = comment.correct(source)
+
+      connection = "before"
+
+      if atEnd and not atBegin:
+        connection = "after"
+      else:
+        connection = "before"
+
+      tokens.append({ "type" : "comment", "detail" : format, "multiline" : multiline, "connection" : connection, "source" : source, "id" : parseUniqueId, "line" : parseLine, "column" : parseColumn, "begin" : atBegin, "end" : atEnd })
+      parseLine += len(fragment.split("\n")) - 1
+
+    elif comment.R_INLINE_COMMENT.match(fragment):
+      # print "Type:SingleComment"
+      source = recoverEscape(fragment)
+      content = parseFragmentLead(content, fragment, tokens)
+
+      atBegin = hasLeadingContent(tokens)
+      atEnd = True
+
+      if atBegin:
+        connection = "after"
+      else:
+        connection = "before"
+
+      source = comment.correct(source)
+
+      tokens.append({ "type" : "comment", "detail" : "inline", "multiline" : False, "connection" : connection, "source" : source, "id" : parseUniqueId, "line" : parseLine, "column" : parseColumn, "begin" : atBegin, "end" : atEnd })
+
+    elif R_STRING_A.match(fragment):
+      # print "Type:StringA: %s" % fragment
+      content = parseFragmentLead(content, fragment, tokens)
+      tokens.append({ "type" : "string", "detail" : "singlequotes", "source" : recoverEscape(fragment)[1:-1].replace("\\\n",""), "id" : parseUniqueId, "line" : parseLine, "column" : parseColumn })
+
+    elif R_STRING_B.match(fragment):
+      # print "Type:StringB: %s" % fragment
+      content = parseFragmentLead(content, fragment, tokens)
+      tokens.append({ "type" : "string", "detail" : "doublequotes", "source" : recoverEscape(fragment)[1:-1].replace("\\\n",""), "id" : parseUniqueId, "line" : parseLine, "column" : parseColumn })
+
+    elif R_FLOAT.match(fragment):
+      # print "Type:Float: %s" % fragment
+      content = parseFragmentLead(content, fragment, tokens)
+      tokens.append({ "type" : "number", "detail" : "float", "source" : fragment, "id" : parseUniqueId, "line" : parseLine, "column" : parseColumn })
+
+    elif R_OPERATORS.match(fragment):
+      # print "Type:Operator: %s" % fragment
+      content = parseFragmentLead(content, fragment, tokens)
+      tokens.append({ "type" : "token", "detail" : config.JSTOKENS[fragment], "source" : fragment, "id" : parseUniqueId, "line" : parseLine, "column" : parseColumn })
+
+    else:
+      fragresult = R_REGEXP.search(fragment)
+
+      if fragresult:
+        # print "Type:RegExp: %s" % fragresult.group(0)
+
+        if R_REGEXP_A.match(fragment) or R_REGEXP_B.match(fragment) or R_REGEXP_C.match(fragment) or R_REGEXP_D.match(fragment):
+          content = parseFragmentLead(content, fragresult.group(0), tokens)
+          tokens.append({ "type" : "regexp", "detail" : "", "source" : recoverEscape(fragresult.group(0)), "id" : parseUniqueId, "line" : parseLine, "column" : parseColumn })
+
+        else:
+          print "Bad regular expression: %s" % fragresult.group(0)
+
+      else:
+        print "Type:None!"
+
+  tokens.extend(parsePart(recoverEscape(content)))
+  tokens.append({ "type" : "eof", "source" : "", "detail" : "", "id" : parseUniqueId, "line" : parseLine, "column" : parseColumn })
+
+  return tokens
+
+
+
+def parseFile(fileName, uniqueId="", encoding="utf-8"):
+  return parseStream(filetool.read(fileName, encoding), uniqueId)
+
+
+
+
+def convertTokensToString(tokens):
+  tokenizedString = ""
+
+  for token in tokens:
+    tokenizedString += "%s%s" % (token, "\n")
+
+  return tokenizedString
+
+
+
+
+
+def main():
+  parser = optparse.OptionParser()
+
+  parser.add_option("-w", "--write", action="store_true", dest="write", default=False, help="Writes file to incoming fileName + EXTENSION.")
+  parser.add_option("-e", "--extension", dest="extension", metavar="EXTENSION", help="The EXTENSION to use", default=".tokenized")
+  parser.add_option("--encoding", dest="encoding", default="utf-8", metavar="ENCODING", help="Defines the encoding expected for input files.")
+
+  (options, args) = parser.parse_args()
+
+  if len(args) == 0:
+    print "Needs one or more arguments (files) to tokenize!"
+    sys.exit(1)
+
+  for fileName in args:
+    if options.write:
+      print "Compiling %s => %s%s" % (fileName, fileName, options.extension)
+    else:
+      print "Compiling %s => stdout" % fileName
+
+    tokenString = convertTokensToString(parseFile(fileName, "", options.encoding))
+
+    if options.write:
+      filetool.save(fileName + options.extension, tokenString, options.encoding)
+      
+    else:
+      try:
+        print tokenString
+
+      except UnicodeEncodeError:
+        print "  * Could not encode result to ascii. Use '-w' instead."
+        sys.exit(1)
+
+
+
+
+if __name__ == '__main__':
+  try:
+    main()
+
+  except KeyboardInterrupt:
+    print
+    print "  * Keyboard Interrupt"
+    sys.exit(1)