diff options
author | Jelmer Vernooij <jelmer@samba.org> | 2012-01-04 00:31:27 +0100 |
---|---|---|
committer | Jelmer Vernooij <jelmer@samba.org> | 2012-01-04 22:34:20 +0100 |
commit | 4f4bce5301ffd8c12aed1b108affa1a75feefb67 (patch) | |
tree | 1607accd70a2c37a9b996f7b42ec926b014cfe5b /buildtools/wafadmin/Task.py | |
parent | 1b45f2aed86dda9fda6e6bcf1c9c7cbdc471c18d (diff) | |
download | samba-4f4bce5301ffd8c12aed1b108affa1a75feefb67.tar.gz samba-4f4bce5301ffd8c12aed1b108affa1a75feefb67.tar.bz2 samba-4f4bce5301ffd8c12aed1b108affa1a75feefb67.zip |
Include waf as an extracted source directory, rather than as a one-in-a-file script.
Diffstat (limited to 'buildtools/wafadmin/Task.py')
-rw-r--r-- | buildtools/wafadmin/Task.py | 1200 |
1 files changed, 1200 insertions, 0 deletions
diff --git a/buildtools/wafadmin/Task.py b/buildtools/wafadmin/Task.py new file mode 100644 index 0000000000..5cda2ec730 --- /dev/null +++ b/buildtools/wafadmin/Task.py @@ -0,0 +1,1200 @@ +#!/usr/bin/env python +# encoding: utf-8 +# Thomas Nagy, 2005-2008 (ita) + +""" +Running tasks in parallel is a simple problem, but in practice it is more complicated: +* dependencies discovered during the build (dynamic task creation) +* dependencies discovered after files are compiled +* the amount of tasks and dependencies (graph size) can be huge + +This is why the dependency management is split on three different levels: +1. groups of tasks that run all after another group of tasks +2. groups of tasks that can be run in parallel +3. tasks that can run in parallel, but with possible unknown ad-hoc dependencies + +The point #1 represents a strict sequential order between groups of tasks, for example a compiler is produced +and used to compile the rest, whereas #2 and #3 represent partial order constraints where #2 applies to the kind of task +and #3 applies to the task instances. + +#1 is held by the task manager: ordered list of TaskGroups (see bld.add_group) +#2 is held by the task groups and the task types: precedence after/before (topological sort), + and the constraints extracted from file extensions +#3 is held by the tasks individually (attribute run_after), + and the scheduler (Runner.py) use Task::runnable_status to reorder the tasks + +-- + +To try, use something like this in your code: +import Constants, Task +Task.algotype = Constants.MAXPARALLEL + +-- + +There are two concepts with the tasks (individual units of change): +* dependency (if 1 is recompiled, recompile 2) +* order (run 2 after 1) + +example 1: if t1 depends on t2 and t2 depends on t3 it is not necessary to make t1 depend on t3 (dependency is transitive) +example 2: if t1 depends on a node produced by t2, it is not immediately obvious that t1 must run after t2 (order is not obvious) + +The role of the Task Manager is to give the tasks in order (groups of task that may be run in parallel one after the other) + +""" + +import os, shutil, sys, re, random, datetime, tempfile, shlex +from Utils import md5 +import Build, Runner, Utils, Node, Logs, Options +from Logs import debug, warn, error +from Constants import * + +algotype = NORMAL +#algotype = JOBCONTROL +#algotype = MAXPARALLEL + +COMPILE_TEMPLATE_SHELL = ''' +def f(task): + env = task.env + wd = getattr(task, 'cwd', None) + p = env.get_flat + cmd = \'\'\' %s \'\'\' % s + return task.exec_command(cmd, cwd=wd) +''' + +COMPILE_TEMPLATE_NOSHELL = ''' +def f(task): + env = task.env + wd = getattr(task, 'cwd', None) + def to_list(xx): + if isinstance(xx, str): return [xx] + return xx + lst = [] + %s + lst = [x for x in lst if x] + return task.exec_command(lst, cwd=wd) +''' + + +""" +Enable different kind of dependency algorithms: +1 make groups: first compile all cpps and then compile all links (NORMAL) +2 parallelize all (each link task run after its dependencies) (MAXPARALLEL) +3 like 1 but provide additional constraints for the parallelization (MAXJOBS) + +In theory 1. will be faster than 2 for waf, but might be slower for builds +The scheme 2 will not allow for running tasks one by one so it can cause disk thrashing on huge builds +""" + +file_deps = Utils.nada +""" +Additional dependency pre-check may be added by replacing the function file_deps. +e.g. extract_outputs, extract_deps below. +""" + +class TaskManager(object): + """The manager is attached to the build object, it holds a list of TaskGroup""" + def __init__(self): + self.groups = [] + self.tasks_done = [] + self.current_group = 0 + self.groups_names = {} + + def group_name(self, g): + """name for the group g (utility)""" + if not isinstance(g, TaskGroup): + g = self.groups[g] + for x in self.groups_names: + if id(self.groups_names[x]) == id(g): + return x + return '' + + def group_idx(self, tg): + """group the task generator tg is in""" + se = id(tg) + for i in range(len(self.groups)): + g = self.groups[i] + for t in g.tasks_gen: + if id(t) == se: + return i + return None + + def get_next_set(self): + """return the next set of tasks to execute + the first parameter is the maximum amount of parallelization that may occur""" + ret = None + while not ret and self.current_group < len(self.groups): + ret = self.groups[self.current_group].get_next_set() + if ret: return ret + else: + self.groups[self.current_group].process_install() + self.current_group += 1 + return (None, None) + + def add_group(self, name=None, set=True): + #if self.groups and not self.groups[0].tasks: + # error('add_group: an empty group is already present') + g = TaskGroup() + + if name and name in self.groups_names: + error('add_group: name %s already present' % name) + self.groups_names[name] = g + self.groups.append(g) + if set: + self.current_group = len(self.groups) - 1 + + def set_group(self, idx): + if isinstance(idx, str): + g = self.groups_names[idx] + for x in xrange(len(self.groups)): + if id(g) == id(self.groups[x]): + self.current_group = x + else: + self.current_group = idx + + def add_task_gen(self, tgen): + if not self.groups: self.add_group() + self.groups[self.current_group].tasks_gen.append(tgen) + + def add_task(self, task): + if not self.groups: self.add_group() + self.groups[self.current_group].tasks.append(task) + + def total(self): + total = 0 + if not self.groups: return 0 + for group in self.groups: + total += len(group.tasks) + return total + + def add_finished(self, tsk): + self.tasks_done.append(tsk) + bld = tsk.generator.bld + if bld.is_install: + f = None + if 'install' in tsk.__dict__: + f = tsk.__dict__['install'] + # install=0 to prevent installation + if f: f(tsk) + else: + tsk.install() + +class TaskGroup(object): + "the compilation of one group does not begin until the previous group has finished (in the manager)" + def __init__(self): + self.tasks = [] # this list will be consumed + self.tasks_gen = [] + + self.cstr_groups = Utils.DefaultDict(list) # tasks having equivalent constraints + self.cstr_order = Utils.DefaultDict(set) # partial order between the cstr groups + self.temp_tasks = [] # tasks put on hold + self.ready = 0 + self.post_funs = [] + + def reset(self): + "clears the state of the object (put back the tasks into self.tasks)" + for x in self.cstr_groups: + self.tasks += self.cstr_groups[x] + self.tasks = self.temp_tasks + self.tasks + self.temp_tasks = [] + self.cstr_groups = Utils.DefaultDict(list) + self.cstr_order = Utils.DefaultDict(set) + self.ready = 0 + + def process_install(self): + for (f, k, kw) in self.post_funs: + f(*k, **kw) + + def prepare(self): + "prepare the scheduling" + self.ready = 1 + file_deps(self.tasks) + self.make_cstr_groups() + self.extract_constraints() + + def get_next_set(self): + "next list of tasks to execute using max job settings, returns (maxjobs, task_list)" + global algotype + if algotype == NORMAL: + tasks = self.tasks_in_parallel() + maxj = MAXJOBS + elif algotype == JOBCONTROL: + (maxj, tasks) = self.tasks_by_max_jobs() + elif algotype == MAXPARALLEL: + tasks = self.tasks_with_inner_constraints() + maxj = MAXJOBS + else: + raise Utils.WafError("unknown algorithm type %s" % (algotype)) + + if not tasks: return () + return (maxj, tasks) + + def make_cstr_groups(self): + "unite the tasks that have similar constraints" + self.cstr_groups = Utils.DefaultDict(list) + for x in self.tasks: + h = x.hash_constraints() + self.cstr_groups[h].append(x) + + def set_order(self, a, b): + self.cstr_order[a].add(b) + + def compare_exts(self, t1, t2): + "extension production" + x = "ext_in" + y = "ext_out" + in_ = t1.attr(x, ()) + out_ = t2.attr(y, ()) + for k in in_: + if k in out_: + return -1 + in_ = t2.attr(x, ()) + out_ = t1.attr(y, ()) + for k in in_: + if k in out_: + return 1 + return 0 + + def compare_partial(self, t1, t2): + "partial relations after/before" + m = "after" + n = "before" + name = t2.__class__.__name__ + if name in Utils.to_list(t1.attr(m, ())): return -1 + elif name in Utils.to_list(t1.attr(n, ())): return 1 + name = t1.__class__.__name__ + if name in Utils.to_list(t2.attr(m, ())): return 1 + elif name in Utils.to_list(t2.attr(n, ())): return -1 + return 0 + + def extract_constraints(self): + "extract the parallelization constraints from the tasks with different constraints" + keys = self.cstr_groups.keys() + max = len(keys) + # hopefully the length of this list is short + for i in xrange(max): + t1 = self.cstr_groups[keys[i]][0] + for j in xrange(i + 1, max): + t2 = self.cstr_groups[keys[j]][0] + + # add the constraints based on the comparisons + val = (self.compare_exts(t1, t2) + or self.compare_partial(t1, t2) + ) + if val > 0: + self.set_order(keys[i], keys[j]) + elif val < 0: + self.set_order(keys[j], keys[i]) + + def tasks_in_parallel(self): + "(NORMAL) next list of tasks that may be executed in parallel" + + if not self.ready: self.prepare() + + keys = self.cstr_groups.keys() + + unconnected = [] + remainder = [] + + for u in keys: + for k in self.cstr_order.values(): + if u in k: + remainder.append(u) + break + else: + unconnected.append(u) + + toreturn = [] + for y in unconnected: + toreturn.extend(self.cstr_groups[y]) + + # remove stuff only after + for y in unconnected: + try: self.cstr_order.__delitem__(y) + except KeyError: pass + self.cstr_groups.__delitem__(y) + + if not toreturn and remainder: + raise Utils.WafError("circular order constraint detected %r" % remainder) + + return toreturn + + def tasks_by_max_jobs(self): + "(JOBCONTROL) returns the tasks that can run in parallel with the max amount of jobs" + if not self.ready: self.prepare() + if not self.temp_tasks: self.temp_tasks = self.tasks_in_parallel() + if not self.temp_tasks: return (None, None) + + maxjobs = MAXJOBS + ret = [] + remaining = [] + for t in self.temp_tasks: + m = getattr(t, "maxjobs", getattr(self.__class__, "maxjobs", MAXJOBS)) + if m > maxjobs: + remaining.append(t) + elif m < maxjobs: + remaining += ret + ret = [t] + maxjobs = m + else: + ret.append(t) + self.temp_tasks = remaining + return (maxjobs, ret) + + def tasks_with_inner_constraints(self): + """(MAXPARALLEL) returns all tasks in this group, but add the constraints on each task instance + as an optimization, it might be desirable to discard the tasks which do not have to run""" + if not self.ready: self.prepare() + + if getattr(self, "done", None): return None + + for p in self.cstr_order: + for v in self.cstr_order[p]: + for m in self.cstr_groups[p]: + for n in self.cstr_groups[v]: + n.set_run_after(m) + self.cstr_order = Utils.DefaultDict(set) + self.cstr_groups = Utils.DefaultDict(list) + self.done = 1 + return self.tasks[:] # make a copy + +class store_task_type(type): + "store the task types that have a name ending in _task into a map (remember the existing task types)" + def __init__(cls, name, bases, dict): + super(store_task_type, cls).__init__(name, bases, dict) + name = cls.__name__ + + if name.endswith('_task'): + name = name.replace('_task', '') + if name != 'TaskBase': + TaskBase.classes[name] = cls + +class TaskBase(object): + """Base class for all Waf tasks + + The most important methods are (by usual order of call): + 1 runnable_status: ask the task if it should be run, skipped, or if we have to ask later + 2 __str__: string to display to the user + 3 run: execute the task + 4 post_run: after the task is run, update the cache about the task + + This class should be seen as an interface, it provides the very minimum necessary for the scheduler + so it does not do much. + + For illustration purposes, TaskBase instances try to execute self.fun (if provided) + """ + + __metaclass__ = store_task_type + + color = "GREEN" + maxjobs = MAXJOBS + classes = {} + stat = None + + def __init__(self, *k, **kw): + self.hasrun = NOT_RUN + + try: + self.generator = kw['generator'] + except KeyError: + self.generator = self + self.bld = Build.bld + + if kw.get('normal', 1): + self.generator.bld.task_manager.add_task(self) + + def __repr__(self): + "used for debugging" + return '\n\t{task: %s %s}' % (self.__class__.__name__, str(getattr(self, "fun", ""))) + + def __str__(self): + "string to display to the user" + if hasattr(self, 'fun'): + return 'executing: %s\n' % self.fun.__name__ + return self.__class__.__name__ + '\n' + + def exec_command(self, *k, **kw): + "use this for executing commands from tasks" + # TODO in waf 1.6, eliminate bld.exec_command, and move the cwd processing to here + if self.env['env']: + kw['env'] = self.env['env'] + return self.generator.bld.exec_command(*k, **kw) + + def runnable_status(self): + "RUN_ME SKIP_ME or ASK_LATER" + return RUN_ME + + def can_retrieve_cache(self): + return False + + def call_run(self): + if self.can_retrieve_cache(): + return 0 + return self.run() + + def run(self): + "called if the task must run" + if hasattr(self, 'fun'): + return self.fun(self) + return 0 + + def post_run(self): + "update the dependency tree (node stats)" + pass + + def display(self): + "print either the description (using __str__) or the progress bar or the ide output" + col1 = Logs.colors(self.color) + col2 = Logs.colors.NORMAL + + if Options.options.progress_bar == 1: + return self.generator.bld.progress_line(self.position[0], self.position[1], col1, col2) + + if Options.options.progress_bar == 2: + ela = Utils.get_elapsed_time(self.generator.bld.ini) + try: + ins = ','.join([n.name for n in self.inputs]) + except AttributeError: + ins = '' + try: + outs = ','.join([n.name for n in self.outputs]) + except AttributeError: + outs = '' + return '|Total %s|Current %s|Inputs %s|Outputs %s|Time %s|\n' % (self.position[1], self.position[0], ins, outs, ela) + + total = self.position[1] + n = len(str(total)) + fs = '[%%%dd/%%%dd] %%s%%s%%s' % (n, n) + return fs % (self.position[0], self.position[1], col1, str(self), col2) + + def attr(self, att, default=None): + "retrieve an attribute from the instance or from the class (microoptimization here)" + ret = getattr(self, att, self) + if ret is self: return getattr(self.__class__, att, default) + return ret + + def hash_constraints(self): + "identify a task type for all the constraints relevant for the scheduler: precedence, file production" + a = self.attr + sum = hash((self.__class__.__name__, + str(a('before', '')), + str(a('after', '')), + str(a('ext_in', '')), + str(a('ext_out', '')), + self.__class__.maxjobs)) + return sum + + def format_error(self): + "error message to display to the user (when a build fails)" + if getattr(self, "err_msg", None): + return self.err_msg + elif self.hasrun == CRASHED: + try: + return " -> task failed (err #%d): %r" % (self.err_code, self) + except AttributeError: + return " -> task failed: %r" % self + elif self.hasrun == MISSING: + return " -> missing files: %r" % self + else: + return '' + + def install(self): + """ + installation is performed by looking at the task attributes: + * install_path: installation path like "${PREFIX}/bin" + * filename: install the first node in the outputs as a file with a particular name, be certain to give os.sep + * chmod: permissions + """ + bld = self.generator.bld + d = self.attr('install') + + if self.attr('install_path'): + lst = [a.relpath_gen(bld.srcnode) for a in self.outputs] + perm = self.attr('chmod', O644) + if self.attr('src'): + # if src is given, install the sources too + lst += [a.relpath_gen(bld.srcnode) for a in self.inputs] + if self.attr('filename'): + dir = self.install_path.rstrip(os.sep) + os.sep + self.attr('filename') + bld.install_as(dir, lst[0], self.env, perm) + else: + bld.install_files(self.install_path, lst, self.env, perm) + +class Task(TaskBase): + """The parent class is quite limited, in this version: + * file system interaction: input and output nodes + * persistence: do not re-execute tasks that have already run + * caching: same files can be saved and retrieved from a cache directory + * dependencies: + implicit, like .c files depending on .h files + explicit, like the input nodes or the dep_nodes + environment variables, like the CXXFLAGS in self.env + """ + vars = [] + def __init__(self, env, **kw): + TaskBase.__init__(self, **kw) + self.env = env + + # inputs and outputs are nodes + # use setters when possible + self.inputs = [] + self.outputs = [] + + self.dep_nodes = [] + self.run_after = [] + + # Additionally, you may define the following + #self.dep_vars = 'PREFIX DATADIR' + + def __str__(self): + "string to display to the user" + env = self.env + src_str = ' '.join([a.nice_path(env) for a in self.inputs]) + tgt_str = ' '.join([a.nice_path(env) for a in self.outputs]) + if self.outputs: sep = ' -> ' + else: sep = '' + return '%s: %s%s%s\n' % (self.__class__.__name__.replace('_task', ''), src_str, sep, tgt_str) + + def __repr__(self): + return "".join(['\n\t{task: ', self.__class__.__name__, " ", ",".join([x.name for x in self.inputs]), " -> ", ",".join([x.name for x in self.outputs]), '}']) + + def unique_id(self): + "get a unique id: hash the node paths, the variant, the class, the function" + try: + return self.uid + except AttributeError: + "this is not a real hot zone, but we want to avoid surprizes here" + m = md5() + up = m.update + up(self.__class__.__name__) + up(self.env.variant()) + p = None + for x in self.inputs + self.outputs: + if p != x.parent.id: + p = x.parent.id + up(x.parent.abspath()) + up(x.name) + self.uid = m.digest() + return self.uid + + def set_inputs(self, inp): + if isinstance(inp, list): self.inputs += inp + else: self.inputs.append(inp) + + def set_outputs(self, out): + if isinstance(out, list): self.outputs += out + else: self.outputs.append(out) + + def set_run_after(self, task): + "set (scheduler) order on another task" + # TODO: handle list or object + assert isinstance(task, TaskBase) + self.run_after.append(task) + + def add_file_dependency(self, filename): + "TODO user-provided file dependencies" + node = self.generator.bld.path.find_resource(filename) + self.dep_nodes.append(node) + + def signature(self): + # compute the result one time, and suppose the scan_signature will give the good result + try: return self.cache_sig[0] + except AttributeError: pass + + self.m = md5() + + # explicit deps + exp_sig = self.sig_explicit_deps() + + # env vars + var_sig = self.sig_vars() + + # implicit deps + + imp_sig = SIG_NIL + if self.scan: + try: + imp_sig = self.sig_implicit_deps() + except ValueError: + return self.signature() + + # we now have the signature (first element) and the details (for debugging) + ret = self.m.digest() + self.cache_sig = (ret, exp_sig, imp_sig, var_sig) + return ret + + def runnable_status(self): + "SKIP_ME RUN_ME or ASK_LATER" + #return 0 # benchmarking + + if self.inputs and (not self.outputs): + if not getattr(self.__class__, 'quiet', None): + warn("invalid task (no inputs OR outputs): override in a Task subclass or set the attribute 'quiet' %r" % self) + + for t in self.run_after: + if not t.hasrun: + return ASK_LATER + + env = self.env + bld = self.generator.bld + + # first compute the signature + new_sig = self.signature() + + # compare the signature to a signature computed previously + key = self.unique_id() + try: + prev_sig = bld.task_sigs[key][0] + except KeyError: + debug("task: task %r must run as it was never run before or the task code changed", self) + return RUN_ME + + # compare the signatures of the outputs + for node in self.outputs: + variant = node.variant(env) + try: + if bld.node_sigs[variant][node.id] != new_sig: + return RUN_ME + except KeyError: + debug("task: task %r must run as the output nodes do not exist", self) + return RUN_ME + + # debug if asked to + if Logs.verbose: self.debug_why(bld.task_sigs[key]) + + if new_sig != prev_sig: + return RUN_ME + return SKIP_ME + + def post_run(self): + "called after a successful task run" + bld = self.generator.bld + env = self.env + sig = self.signature() + ssig = sig.encode('hex') + + variant = env.variant() + for node in self.outputs: + # check if the node exists .. + try: + os.stat(node.abspath(env)) + except OSError: + self.hasrun = MISSING + self.err_msg = '-> missing file: %r' % node.abspath(env) + raise Utils.WafError + + # important, store the signature for the next run + bld.node_sigs[variant][node.id] = sig + bld.task_sigs[self.unique_id()] = self.cache_sig + + # file caching, if possible + # try to avoid data corruption as much as possible + if not Options.cache_global or Options.options.nocache or not self.outputs: + return None + + if getattr(self, 'cached', None): + return None + + dname = os.path.join(Options.cache_global, ssig) + tmpdir = tempfile.mkdtemp(prefix=Options.cache_global + os.sep + 'waf') + + try: + shutil.rmtree(dname) + except: + pass + + try: + i = 0 + for node in self.outputs: + variant = node.variant(env) + dest = os.path.join(tmpdir, str(i) + node.name) + shutil.copy2(node.abspath(env), dest) + i += 1 + except (OSError, IOError): + try: + shutil.rmtree(tmpdir) + except: + pass + else: + try: + os.rename(tmpdir, dname) + except OSError: + try: + shutil.rmtree(tmpdir) + except: + pass + else: + try: + os.chmod(dname, O755) + except: + pass + + def can_retrieve_cache(self): + """ + Retrieve build nodes from the cache + update the file timestamps to help cleaning the least used entries from the cache + additionally, set an attribute 'cached' to avoid re-creating the same cache files + + suppose there are files in cache/dir1/file1 and cache/dir2/file2 + first, read the timestamp of dir1 + then try to copy the files + then look at the timestamp again, if it has changed, the data may have been corrupt (cache update by another process) + should an exception occur, ignore the data + """ + if not Options.cache_global or Options.options.nocache or not self.outputs: + return None + + env = self.env + sig = self.signature() + ssig = sig.encode('hex') + + # first try to access the cache folder for the task + dname = os.path.join(Options.cache_global, ssig) + try: + t1 = os.stat(dname).st_mtime + except OSError: + return None + + i = 0 + for node in self.outputs: + variant = node.variant(env) + + orig = os.path.join(dname, str(i) + node.name) + try: + shutil.copy2(orig, node.abspath(env)) + # mark the cache file as used recently (modified) + os.utime(orig, None) + except (OSError, IOError): + debug('task: failed retrieving file') + return None + i += 1 + + # is it the same folder? + try: + t2 = os.stat(dname).st_mtime + except OSError: + return None + + if t1 != t2: + return None + + for node in self.outputs: + self.generator.bld.node_sigs[variant][node.id] = sig + if Options.options.progress_bar < 1: + self.generator.bld.printout('restoring from cache %r\n' % node.bldpath(env)) + + self.cached = True + return 1 + + def debug_why(self, old_sigs): + "explains why a task is run" + + new_sigs = self.cache_sig + def v(x): + return x.encode('hex') + + debug("Task %r", self) + msgs = ['Task must run', '* Source file or manual dependency', '* Implicit dependency', '* Environment variable'] + tmp = 'task: -> %s: %s %s' + for x in xrange(len(msgs)): + if (new_sigs[x] != old_sigs[x]): + debug(tmp, msgs[x], v(old_sigs[x]), v(new_sigs[x])) + + def sig_explicit_deps(self): + bld = self.generator.bld + up = self.m.update + + # the inputs + for x in self.inputs + getattr(self, 'dep_nodes', []): + if not x.parent.id in bld.cache_scanned_folders: + bld.rescan(x.parent) + + variant = x.variant(self.env) + try: + up(bld.node_sigs[variant][x.id]) + except KeyError: + raise Utils.WafError('Missing node signature for %r (required by %r)' % (x, self)) + + # manual dependencies, they can slow down the builds + if bld.deps_man: + additional_deps = bld.deps_man + for x in self.inputs + self.outputs: + try: + d = additional_deps[x.id] + except KeyError: + continue + + for v in d: + if isinstance(v, Node.Node): + bld.rescan(v.parent) + variant = v.variant(self.env) + try: + v = bld.node_sigs[variant][v.id] + except KeyError: + raise Utils.WafError('Missing node signature for %r (required by %r)' % (v, self)) + elif hasattr(v, '__call__'): + v = v() # dependency is a function, call it + up(v) + + for x in self.dep_nodes: + v = bld.node_sigs[x.variant(self.env)][x.id] + up(v) + + return self.m.digest() + + def sig_vars(self): + bld = self.generator.bld + env = self.env + + # dependencies on the environment vars + act_sig = bld.hash_env_vars(env, self.__class__.vars) + self.m.update(act_sig) + + # additional variable dependencies, if provided + dep_vars = getattr(self, 'dep_vars', None) + if dep_vars: + self.m.update(bld.hash_env_vars(env, dep_vars)) + + return self.m.digest() + + #def scan(self, node): + # """this method returns a tuple containing: + # * a list of nodes corresponding to real files + # * a list of names for files not found in path_lst + # the input parameters may have more parameters that the ones used below + # """ + # return ((), ()) + scan = None + + # compute the signature, recompute it if there is no match in the cache + def sig_implicit_deps(self): + "the signature obtained may not be the one if the files have changed, we do it in two steps" + + bld = self.generator.bld + + # get the task signatures from previous runs + key = self.unique_id() + prev_sigs = bld.task_sigs.get(key, ()) + if prev_sigs: + try: + # for issue #379 + if prev_sigs[2] == self.compute_sig_implicit_deps(): + return prev_sigs[2] + except (KeyError, OSError): + pass + del bld.task_sigs[key] + raise ValueError('rescan') + + # no previous run or the signature of the dependencies has changed, rescan the dependencies + (nodes, names) = self.scan() + if Logs.verbose: + debug('deps: scanner for %s returned %s %s', str(self), str(nodes), str(names)) + + # store the dependencies in the cache + bld.node_deps[key] = nodes + bld.raw_deps[key] = names + + # recompute the signature and return it + try: + sig = self.compute_sig_implicit_deps() + except KeyError: + try: + nodes = [] + for k in bld.node_deps.get(self.unique_id(), []): + if k.id & 3 == 2: # Node.FILE: + if not k.id in bld.node_sigs[0]: + nodes.append(k) + else: + if not k.id in bld.node_sigs[self.env.variant()]: + nodes.append(k) + except: + nodes = '?' + raise Utils.WafError('Missing node signature for %r (for implicit dependencies %r)' % (nodes, self)) + + return sig + + def compute_sig_implicit_deps(self): + """it is intended for .cpp and inferred .h files + there is a single list (no tree traversal) + this is the hot spot so ... do not touch""" + upd = self.m.update + + bld = self.generator.bld + tstamp = bld.node_sigs + env = self.env + + for k in bld.node_deps.get(self.unique_id(), []): + # unlikely but necessary if it happens + if not k.parent.id in bld.cache_scanned_folders: + # if the parent folder is removed, an OSError may be thrown + bld.rescan(k.parent) + + # if the parent folder is removed, a KeyError will be thrown + if k.id & 3 == 2: # Node.FILE: + upd(tstamp[0][k.id]) + else: + upd(tstamp[env.variant()][k.id]) + + return self.m.digest() + +def funex(c): + dc = {} + exec(c, dc) + return dc['f'] + +reg_act = re.compile(r"(?P<backslash>\\)|(?P<dollar>\$\$)|(?P<subst>\$\{(?P<var>\w+)(?P<code>.*?)\})", re.M) +def compile_fun_shell(name, line): + """Compiles a string (once) into a function, eg: + simple_task_type('c++', '${CXX} -o ${TGT[0]} ${SRC} -I ${SRC[0].parent.bldpath()}') + + The env variables (CXX, ..) on the task must not hold dicts (order) + The reserved keywords TGT and SRC represent the task input and output nodes + + quick test: + bld(source='wscript', rule='echo "foo\\${SRC[0].name}\\bar"') + """ + + extr = [] + def repl(match): + g = match.group + if g('dollar'): return "$" + elif g('backslash'): return '\\\\' + elif g('subst'): extr.append((g('var'), g('code'))); return "%s" + return None + + line = reg_act.sub(repl, line) or line + + parm = [] + dvars = [] + app = parm.append + for (var, meth) in extr: + if var == 'SRC': + if meth: app('task.inputs%s' % meth) + else: app('" ".join([a.srcpath(env) for a in task.inputs])') + elif var == 'TGT': + if meth: app('task.outputs%s' % meth) + else: app('" ".join([a.bldpath(env) for a in task.outputs])') + else: + if not var in dvars: dvars.append(var) + app("p('%s')" % var) + if parm: parm = "%% (%s) " % (',\n\t\t'.join(parm)) + else: parm = '' + + c = COMPILE_TEMPLATE_SHELL % (line, parm) + + debug('action: %s', c) + return (funex(c), dvars) + +def compile_fun_noshell(name, line): + + extr = [] + def repl(match): + g = match.group + if g('dollar'): return "$" + elif g('subst'): extr.append((g('var'), g('code'))); return "<<|@|>>" + return None + + line2 = reg_act.sub(repl, line) + params = line2.split('<<|@|>>') + + buf = [] + dvars = [] + app = buf.append + for x in xrange(len(extr)): + params[x] = params[x].strip() + if params[x]: + app("lst.extend(%r)" % params[x].split()) + (var, meth) = extr[x] + if var == 'SRC': + if meth: app('lst.append(task.inputs%s)' % meth) + else: app("lst.extend([a.srcpath(env) for a in task.inputs])") + elif var == 'TGT': + if meth: app('lst.append(task.outputs%s)' % meth) + else: app("lst.extend([a.bldpath(env) for a in task.outputs])") + else: + app('lst.extend(to_list(env[%r]))' % var) + if not var in dvars: dvars.append(var) + + if params[-1]: + app("lst.extend(%r)" % shlex.split(params[-1])) + + fun = COMPILE_TEMPLATE_NOSHELL % "\n\t".join(buf) + debug('action: %s', fun) + return (funex(fun), dvars) + +def compile_fun(name, line, shell=None): + "commands can be launched by the shell or not" + if line.find('<') > 0 or line.find('>') > 0 or line.find('&&') > 0: + shell = True + #else: + # shell = False + + if shell is None: + if sys.platform == 'win32': + shell = False + else: + shell = True + + if shell: + return compile_fun_shell(name, line) + else: + return compile_fun_noshell(name, line) + +def simple_task_type(name, line, color='GREEN', vars=[], ext_in=[], ext_out=[], before=[], after=[], shell=None): + """return a new Task subclass with the function run compiled from the line given""" + (fun, dvars) = compile_fun(name, line, shell) + fun.code = line + return task_type_from_func(name, fun, vars or dvars, color, ext_in, ext_out, before, after) + +def task_type_from_func(name, func, vars=[], color='GREEN', ext_in=[], ext_out=[], before=[], after=[]): + """return a new Task subclass with the function run compiled from the line given""" + params = { + 'run': func, + 'vars': vars, + 'color': color, + 'name': name, + 'ext_in': Utils.to_list(ext_in), + 'ext_out': Utils.to_list(ext_out), + 'before': Utils.to_list(before), + 'after': Utils.to_list(after), + } + + cls = type(Task)(name, (Task,), params) + TaskBase.classes[name] = cls + return cls + +def always_run(cls): + """Set all task instances of this class to be executed whenever a build is started + The task signature is calculated, but the result of the comparation between + task signatures is bypassed + """ + old = cls.runnable_status + def always(self): + ret = old(self) + if ret == SKIP_ME: + return RUN_ME + return ret + cls.runnable_status = always + +def update_outputs(cls): + """When a command is always run, it is possible that the output only change + sometimes. By default the build node have as a hash the signature of the task + which may not change. With this, the output nodes (produced) are hashed, + and the hashes are set to the build nodes + + This may avoid unnecessary recompilations, but it uses more resources + (hashing the output files) so it is not used by default + """ + old_post_run = cls.post_run + def post_run(self): + old_post_run(self) + bld = self.generator.bld + for output in self.outputs: + bld.node_sigs[self.env.variant()][output.id] = Utils.h_file(output.abspath(self.env)) + bld.task_sigs[output.id] = self.unique_id() + cls.post_run = post_run + + old_runnable_status = cls.runnable_status + def runnable_status(self): + status = old_runnable_status(self) + if status != RUN_ME: + return status + + uid = self.unique_id() + try: + bld = self.outputs[0].__class__.bld + new_sig = self.signature() + prev_sig = bld.task_sigs[uid][0] + if prev_sig == new_sig: + for x in self.outputs: + if not x.id in bld.node_sigs[self.env.variant()]: + return RUN_ME + if bld.task_sigs[x.id] != uid: # ensure the outputs are associated with *this* task + return RUN_ME + return SKIP_ME + except KeyError: + pass + except IndexError: + pass + return RUN_ME + cls.runnable_status = runnable_status + +def extract_outputs(tasks): + """file_deps: Infer additional dependencies from task input and output nodes + """ + v = {} + for x in tasks: + try: + (ins, outs) = v[x.env.variant()] + except KeyError: + ins = {} + outs = {} + v[x.env.variant()] = (ins, outs) + + for a in getattr(x, 'inputs', []): + try: ins[a.id].append(x) + except KeyError: ins[a.id] = [x] + for a in getattr(x, 'outputs', []): + try: outs[a.id].append(x) + except KeyError: outs[a.id] = [x] + + for (ins, outs) in v.values(): + links = set(ins.iterkeys()).intersection(outs.iterkeys()) + for k in links: + for a in ins[k]: + for b in outs[k]: + a.set_run_after(b) + +def extract_deps(tasks): + """file_deps: Infer additional dependencies from task input and output nodes and from implicit dependencies + returned by the scanners - that will only work if all tasks are created + + this is aimed at people who have pathological builds and who do not care enough + to implement the build dependencies properly + + with two loops over the list of tasks, do not expect this to be really fast + """ + + # first reuse the function above + extract_outputs(tasks) + + # map the output nodes to the tasks producing them + out_to_task = {} + for x in tasks: + v = x.env.variant() + try: + lst = x.outputs + except AttributeError: + pass + else: + for node in lst: + out_to_task[(v, node.id)] = x + + # map the dependencies found to the tasks compiled + dep_to_task = {} + for x in tasks: + try: + x.signature() + except: # this is on purpose + pass + + v = x.env.variant() + key = x.unique_id() + for k in x.generator.bld.node_deps.get(x.unique_id(), []): + try: dep_to_task[(v, k.id)].append(x) + except KeyError: dep_to_task[(v, k.id)] = [x] + + # now get the intersection + deps = set(dep_to_task.keys()).intersection(set(out_to_task.keys())) + + # and add the dependencies from task to task + for idx in deps: + for k in dep_to_task[idx]: + k.set_run_after(out_to_task[idx]) + + # cleanup, remove the signatures + for x in tasks: + try: + delattr(x, 'cache_sig') + except AttributeError: + pass + |