#!/usr/bin/python
# 
#  work out the minimal schema for a set of objectclasses 
#

import optparse

import os, sys

# Find right directory when running from source tree
sys.path.insert(0, "bin/python")

import samba
from samba import getopt as options, Ldb
from ldb import SCOPE_SUBTREE, SCOPE_BASE, LdbError
import sys

parser = optparse.OptionParser("minschema <URL> <classfile>")
sambaopts = options.SambaOptions(parser)
parser.add_option_group(sambaopts)
credopts = options.CredentialsOptions(parser)
parser.add_option_group(credopts)
parser.add_option_group(options.VersionOptions(parser))
parser.add_option("--verbose", help="Be verbose", action="store_true")
parser.add_option("--dump-classes", action="store_true")
parser.add_option("--dump-attributes", action="store_true")
parser.add_option("--dump-subschema", action="store_true")
parser.add_option("--dump-subschema-auto", action="store_true")

opts, args = parser.parse_args()
opts.dump_all = True

if opts.dump_classes:
    opts.dump_all = False
if opts.dump_attributes:
    opts.dump_all = False
if opts.dump_subschema:
    opts.dump_all = False
if opts.dump_subschema_auto:
    opts.dump_all = False
    opts.dump_subschema = True
if opts.dump_all:
    opts.dump_classes = True
    opts.dump_attributes = True
    opts.dump_subschema = True
    opts.dump_subschema_auto = True

if len(args) != 2:
    parser.print_usage()
    sys.exit(1)

(url, classfile) = args

lp_ctx = sambaopts.get_loadparm()

creds = credopts.get_credentials(lp_ctx)
ldb = Ldb(url, credentials=creds)

objectclasses = []
attributes = []

objectclasses_expanded = set()

# the attributes we need for objectclasses
class_attrs = ["objectClass", 
               "subClassOf", 
               "governsID", 
               "possSuperiors", 
               "possibleInferiors",
               "mayContain",
               "mustContain",
               "auxiliaryClass",
               "rDNAttID",
               "showInAdvancedViewOnly",
               "adminDisplayName",
               "adminDescription",
               "objectClassCategory",
               "lDAPDisplayName",
               "schemaIDGUID",
               "systemOnly",
               "systemPossSuperiors",
               "systemMayContain",
               "systemMustContain",
               "systemAuxiliaryClass",
               "defaultSecurityDescriptor",
               "systemFlags",
               "defaultHidingValue",
               "objectCategory",
               "defaultObjectCategory", 
               
               # this attributes are not used by w2k3
               "schemaFlagsEx",
               "msDs-IntId",
               "msDs-Schema-Extensions",
               "classDisplayName",
               "isDefunct"]

attrib_attrs = ["objectClass",
                "attributeID", 
                "attributeSyntax",
                "isSingleValued",
                "rangeLower",
                "rangeUpper",
                "mAPIID",
                "linkID",
                "showInAdvancedViewOnly",
                "adminDisplayName",
                "oMObjectClass",
                "adminDescription",
                "oMSyntax", 
                "searchFlags",
                "extendedCharsAllowed",
                "lDAPDisplayName",
                "schemaIDGUID",
                "attributeSecurityGUID",
                "systemOnly",
                "systemFlags",
                "isMemberOfPartialAttributeSet",
                "objectCategory", 
                
                # this attributes are not used by w2k3
                "schemaFlagsEx",
                "msDs-IntId",
                "msDs-Schema-Extensions",
                "classDisplayName",
                "isEphemeral",
                "isDefunct"]

#
#  notes:
#
#  objectClassCategory 
#      1: structural
#      2: abstract
#      3: auxiliary

def get_object_cn(ldb, name):
    attrs = ["cn"]

    res = ldb.search("(ldapDisplayName=%s)" % name, rootDse["schemaNamingContext"], SCOPE_SUBTREE, attrs)
    assert len(res) == 1

    return res[0]["cn"]

class Objectclass:
    def __init__(self, ldb, name):
        """create an objectclass object"""
        self.name = name
        self.cn = get_object_cn(ldb, name)


class Attribute:
    def __init__(self, ldb, name):
        """create an attribute object"""
        self.name = name
        self.cn = get_object_cn(ldb, name)


syntaxmap = dict()

syntaxmap['2.5.5.1']  = '1.3.6.1.4.1.1466.115.121.1.12'
syntaxmap['2.5.5.2']  = '1.3.6.1.4.1.1466.115.121.1.38'
syntaxmap['2.5.5.3']  = '1.2.840.113556.1.4.1362'
syntaxmap['2.5.5.4']  = '1.2.840.113556.1.4.905'
syntaxmap['2.5.5.5']  = '1.3.6.1.4.1.1466.115.121.1.26'
syntaxmap['2.5.5.6']  = '1.3.6.1.4.1.1466.115.121.1.36'
syntaxmap['2.5.5.7']  = '1.2.840.113556.1.4.903'
syntaxmap['2.5.5.8']  = '1.3.6.1.4.1.1466.115.121.1.7'
syntaxmap['2.5.5.9']  = '1.3.6.1.4.1.1466.115.121.1.27'
syntaxmap['2.5.5.10'] = '1.3.6.1.4.1.1466.115.121.1.40'
syntaxmap['2.5.5.11'] = '1.3.6.1.4.1.1466.115.121.1.24'
syntaxmap['2.5.5.12'] = '1.3.6.1.4.1.1466.115.121.1.15'
syntaxmap['2.5.5.13'] = '1.3.6.1.4.1.1466.115.121.1.43'
syntaxmap['2.5.5.14'] = '1.2.840.113556.1.4.904'
syntaxmap['2.5.5.15'] = '1.2.840.113556.1.4.907'
syntaxmap['2.5.5.16'] = '1.2.840.113556.1.4.906'
syntaxmap['2.5.5.17'] = '1.3.6.1.4.1.1466.115.121.1.40'


def map_attribute_syntax(s):
    """map some attribute syntaxes from some apparently MS specific
    syntaxes to the standard syntaxes"""
    if syntaxmap.has_key(s):
        return syntaxmap[s]
    return s


def fix_dn(dn):
    """fix a string DN to use ${SCHEMADN}"""
    return dn.replace(rootDse["schemaNamingContext"], "${SCHEMADN}")


def write_ldif_one(o, attrs):
    """dump an object as ldif"""
    print "dn: CN=%s,${SCHEMADN}\n" % o["cn"]
    for a in attrs:
        if not o.has_key(a):
            continue
        # special case for oMObjectClass, which is a binary object
        if a == "oMObjectClass":
            print "%s:: %s\n" % (a, o[a])
            continue
        v = o[a]
        if isinstance(v, str):
            v = [v]
        for j in v:
            print "%s: %s\n" % (a, fix_dn(j))
    print "\n"

def write_ldif(o, attrs):
    """dump an array of objects as ldif"""
    for i in o:
        write_ldif_one(i, attrs)


def create_testdn(exampleDN):
    """create a testDN based an an example DN
    the idea is to ensure we obey any structural rules"""
    a = exampleDN.split(",")
    a[0] = "CN=TestDN"
    return ",".join(a)


def find_objectclass_properties(ldb, o):
    """the properties of an objectclass"""
    res = ldb.search(
        expression="(ldapDisplayName=%s)" % o.name,
        base=rootDse["schemaNamingContext"], scope=SCOPE_SUBTREE, attrs=class_attrs)
    assert(len(res) == 1)
    msg = res[0]
    for a in msg:
        o[a] = msg[a]

def find_attribute_properties(ldb, o):
    """find the properties of an attribute"""
    res = ldb.search(
        expression="(ldapDisplayName=%s)" % o.name,
        base=rootDse["schemaNamingContext"], scope=SCOPE_SUBTREE, 
        attrs=attrib_attrs)
    assert(len(res) == 1)
    msg = res[0]
    for a in msg:
        # special case for oMObjectClass, which is a binary object
        if a == "oMObjectClass":
            o[a] = ldb.encode(msg[a])
            continue
        o[a] = msg[a]


def find_objectclass_auto(ldb, o):
    """find the auto-created properties of an objectclass. Only works for 
    classes that can be created using just a DN and the objectclass"""
    if not o.has_key("exampleDN"):
        return
    testdn = create_testdn(o.exampleDN)

    print "testdn is '%s'\n" % testdn

    ldif = "dn: " + testdn
    ldif += "\nobjectClass: " + o.name
    try:
        ldb.add(ldif)
    except LdbError, e:
        print "error adding %s: %s\n" % (o.name, e)
        print "%s\n" % ldif
        return

    res = ldb.search(base=testdn, scope=ldb.SCOPE_BASE)
    ldb.delete(testdn)

    for a in res.msgs[0]:
        attributes[a].autocreate = True


def expand_objectclass(ldb, o):
    """look at auxiliary information from a class to intuit the existance of 
    more classes needed for a minimal schema"""
    attrs = ["auxiliaryClass", "systemAuxiliaryClass",
                  "possSuperiors", "systemPossSuperiors",
                  "subClassOf"]
    res = ldb.search(
        expression="(&(objectClass=classSchema)(ldapDisplayName=%s))" % o.name,
        base=rootDse["schemaNamingContext"], scope=SCOPE_SUBTREE, 
        attrs=attrs)
    print "Expanding class %s\n" % o.name
    assert(len(res) == 1)
    msg = res[0]
    for a in attrs:
        if not msg.has_key(aname):
            continue
        list = msg[aname]
        if isinstance(list, str):
            list = [msg[aname]]
        for name in list:
            if not objectclasses.has_key(name):
                print "Found new objectclass '%s'\n" % name
                objectclasses[name] = Objectclass(ldb, name)


def add_objectclass_attributes(ldb, objectclass):
    """add the must and may attributes from an objectclass to the full list
    of attributes"""
    attrs = ["mustContain", "systemMustContain", 
                  "mayContain", "systemMayContain"]
    for aname in attrs:
        if not objectclass.has_key(aname):
            continue
        alist = objectclass[aname]
        if isinstance(alist, str):
            alist = [alist]
        for a in alist:
            if not attributes.has_key(a):
                attributes[a] = Attribute(ldb, a)


def walk_dn(ldb, dn):
    """process an individual record, working out what attributes it has"""
    # get a list of all possible attributes for this object 
    attrs = ["allowedAttributes"]
    try:
        res = ldb.search("objectClass=*", dn, SCOPE_BASE, attrs)
    except LdbError, e:
        print "Unable to fetch allowedAttributes for '%s' - %r\n" % (dn, e)
        return
    allattrs = res[0]["allowedAttributes"]
    try:
        res = ldb.search("objectClass=*", dn, SCOPE_BASE, allattrs)
    except LdbError, e:
        print "Unable to fetch all attributes for '%s' - %s\n" % (dn, e)
        return
    msg = res[0]
    for a in msg:
        if not attributes.has_key(a):
            attributes[a] = Attribute(ldb, a)

def walk_naming_context(ldb, namingContext):
    """walk a naming context, looking for all records"""
    try:
        res = ldb.search("objectClass=*", namingContext, SCOPE_DEFAULT, 
                         ["objectClass"])
    except LdbError, e:
        print "Unable to fetch objectClasses for '%s' - %s\n" % (namingContext, e)
        return
    for msg in res:
        msg = res.msgs[r]["objectClass"]
        for objectClass in msg:
            if not objectclasses.has_key(objectClass):
                objectclasses[objectClass] = Objectclass(ldb, objectClass)
                objectclasses[objectClass].exampleDN = res.msgs[r]["dn"]
        walk_dn(ldb, res.msgs[r].dn)

def trim_objectclass_attributes(ldb, objectclass):
    """trim the may attributes for an objectClass"""
    # trim possibleInferiors,
    # include only the classes we extracted
    if objectclass.has_key("possibleInferiors"):
        possinf = objectclass["possibleInferiors"]
        newpossinf = []
        if isinstance(possinf, str):
            possinf = [possinf]
        for x in possinf:
            if objectclasses.has_key(x):
                newpossinf[n] = x
                n+=1
        objectclass["possibleInferiors"] = newpossinf

    # trim systemMayContain,
    # remove duplicates
    if objectclass.has_key("systemMayContain"):
        sysmay = objectclass["systemMayContain"]
        newsysmay = []
        if isinstance(sysmay, str):
            sysmay = [sysmay]
        for x in sysmay:
            if not x in newsysmay:
                newsysmay.append(x)
        objectclass["systemMayContain"] = newsysmay

    # trim mayContain,
    # remove duplicates
    if not objectclass.has_key("mayContain"):
        may = objectclass["mayContain"]
        newmay = []
        if isinstance(may, str):
            may = [may]
        for x in may:
            if not x in newmay:
                newmay.append(x)
        objectclass["mayContain"] = newmay

def build_objectclass(ldb, name):
    """load the basic attributes of an objectClass"""
    attrs = ["name"]
    try:
        res = ldb.search(
            expression="(&(objectClass=classSchema)(ldapDisplayName=%s))" % name,
            base=rootDse["schemaNamingContext"], scope=SCOPE_SUBTREE, 
            attrs=attrs)
    except LdbError, e:
        print "unknown class '%s'\n" % name
        return None
    if len(res) == 0:
        print "unknown class '%s'\n" % name
        return None
    return Objectclass(ldb, name)

def attribute_list(objectclass, attr1, attr2):
    """form a coalesced attribute list"""
    a1 = objectclass[attr1]
    a2 = objectclass[attr2]
    if isinstance(a1, str):
        a1 = [a1]
    if isinstance(a2, str):
        a2 = [a2]
    return a1 + a2

def aggregate_list(name, list):
    """write out a list in aggregate form"""
    if list is None:
        return
    print "%s ( %s )" % (name, "$ ".join(list))

def write_aggregate_objectclass(objectclass):
    """write the aggregate record for an objectclass"""
    print "objectClasses: ( %s NAME '%s' " % (objectclass.governsID, objectclass.name)
    if not objectclass.has_key('subClassOf'):
        print "SUP %s " % objectclass['subClassOf']
    if objectclass.objectClassCategory == 1:
        print "STRUCTURAL "
    elif objectclass.objectClassCategory == 2:
        print "ABSTRACT "
    elif objectclass.objectClassCategory == 3:
        print "AUXILIARY "

    list = attribute_list(objectclass, "systemMustContain", "mustContain")
    aggregate_list("MUST", list)

    list = attribute_list(objectclass, "systemMayContain", "mayContain")
    aggregate_list("MAY", list)

    print ")\n"


def write_aggregate_ditcontentrule(objectclass):
    """write the aggregate record for an ditcontentrule"""
    list = attribute_list(objectclass, "auxiliaryClass", "systemAuxiliaryClass")
    if list is None:
        return

    print "dITContentRules: ( %s NAME '%s' " % (objectclass.governsID, objectclass.name)

    aggregate_list("AUX", list)

    may_list = None
    must_list = None

    for c in list:
        list2 = attribute_list(objectclasses[c], 
                       "mayContain", "systemMayContain")
        may_list = may_list + list2
        list2 = attribute_list(objectclasses[c], 
                       "mustContain", "systemMustContain")
        must_list = must_list + list2

    aggregate_list("MUST", must_list)
    aggregate_list("MAY", may_list)

    print ")\n"

def write_aggregate_attribute(attrib):
    """write the aggregate record for an attribute"""
    print "attributeTypes: ( %s NAME '%s' SYNTAX '%s' " % (
           attrib.attributeID, attrib.name, 
           map_attribute_syntax(attrib.attributeSyntax))
    if attrib['isSingleValued'] == "TRUE":
        print "SINGLE-VALUE "
    if attrib['systemOnly'] == "TRUE":
        print "NO-USER-MODIFICATION "

    print ")\n"


def write_aggregate():
    """write the aggregate record"""
    print "dn: CN=Aggregate,${SCHEMADN}\n"
    print """objectClass: top
objectClass: subSchema
objectCategory: CN=SubSchema,${SCHEMADN}
"""
    if not opts.dump_subschema_auto:
        return

    for objectclass in objectclasses:
        write_aggregate_objectclass(objectclass)
    for attr in attributes:
        write_aggregate_attribute(attr)
    for objectclass in objectclasses:
        write_aggregate_ditcontentrule(objectclass)

def load_list(file):
    """load a list from a file"""
    return open(file, 'r').readlines()

# get the rootDSE
res = ldb.search(base="", expression="", scope=SCOPE_BASE, attrs=["schemaNamingContext"])
rootDse = res[0]

# load the list of classes we are interested in
classes = load_list(classfile)
for classname in classes:
    objectclass = build_objectclass(ldb, classname)
    if objectclass is not None:
        objectclasses[classname] = objectclass


#
#  expand the objectclass list as needed
#
expanded = 0

# so EJS do not have while nor the break statement
# cannot find any other way than doing more loops
# than necessary to recursively expand all classes
#
for inf in range(500):
    for n in objectclasses:
        if not n in objectclasses_expanded:
            expand_objectclass(ldb, objectclasses[i])
            objectclasses_expanded.add(n)

#
#  find objectclass properties
#
for objectclass in objectclasses:
    find_objectclass_properties(ldb, objectclass)


#
#  form the full list of attributes
#
for objectclass in objectclasses:
    add_objectclass_attributes(ldb, objectclass)

# and attribute properties
for attr in attributes:
    find_attribute_properties(ldb, attr)

#
# trim the 'may' attribute lists to those really needed
#
for objectclass in objectclasses:
    trim_objectclass_attributes(ldb, objectclass)

#
#  dump an ldif form of the attributes and objectclasses
#
if opts.dump_attributes:
    write_ldif(attributes, attrib_attrs)
if opts.dump_classes:
    write_ldif(objectclasses, class_attrs)
if opts.dump_subschema:
    write_aggregate()

if not opts.verbose:
    sys.exit(0)

#
#  dump list of objectclasses
#
print "objectClasses:\n"
for objectclass in objectclasses:
    print "\t%s\n" % objectclass

print "attributes:\n"
for attr in attributes:
    print "\t%s\n" % attr

print "autocreated attributes:\n"
for attr in attributes:
    if attr.autocreate:
        print "\t%s\n" % i