import os.path import re import xml.etree.ElementTree as ET #TODO: Module members #TODO: dziedziczenie miedzy modulami? MODULES = ["qtcore", "qtgui", "qthelp", "qtnetwork", "qtopengl", "qtscript", "qtscripttools", "qtsql", "qtsvg", "qttest", "qtwebkit", "qtxml", "qtxmlpatterns", "phonon-module", "qtassistant", "qtdesigner"] MODULE_NAME_RE = re.compile(r"""

(.+?)Module
""") MODULE_DESC_RE = re.compile(r"""Module

(.+?)\..+?""") SECTION_RE = re.compile(r"""

(?P.+?)

""") SECTION_EL_RE = re.compile(r"""
  • (?P.+?)
  • """) SECTION_EL_DETAILS_RE = re.compile(r"""(?P.*?)
    .+?)">(?P.+?)(?P.+)""") ARG_DEFAULT_RE = re.compile(r"""(.+?)=(?P.+)""") TYPE_ARG_RE = re.compile(r"""(?P.+?)(?P.+?)""") ARG_RE = re.compile(r"""(?P(.)+?)""") ARG_NOI_RE = re.compile(r"""(?P[\w.]+)""") CLASS_IN_MOD_RE = re.compile(r"""class (?P.+?)""") CLASS_NAME_RE = re.compile(r"""

    (?P.+?) Class Reference
    """) INHERITS_RE = re.compile(r"""

    Inherits(.+?)

    """) INHERITS_ONE_RE = re.compile(r"""">(.+?)""") HTML_TAG_RE = re.compile(r'<.*?>') ENUM_RE = re.compile(r"""(.+?\.){0,1}(?P.+?).+?(?P.+?).+?">(?P.+?)""") def strip_html(s): return HTML_TAG_RE.sub('', s) class CiRoot(object): def __init__(self, *blobs): self.blobs = list(blobs) def et(self): ci = ET.Element("codeintel", description="PyQt4.5 done right :) [ci(a)wuub.net]", name="PyQt4.5-wuub", version="2.0") for i_subel in self.blobs: i_subel.et(ci) return ci class CiVariable(object): def __init__(self, name): self.name = name self.ilk = None self.line = None self.attributes = None self.doc = None self.citdl = None self.subelements = [] def et(self, parent): ci = ET.SubElement(parent, "variable") for i_attr in self.dump_attrs(): val = getattr(self, i_attr) if val: ci.attrib[i_attr] = val for i_elem in self.subelements: i_elem.et(ci) return ci def dump_attrs(self): return ["ilk", "name", "line", "attributes", "doc", "citdl"] class CiScope(object): def __init__(self, ilk, name): self.ilk = ilk self.name = name self.line = None self.lineend = None self.attributes = None self.signature = None self.doc = None self.subelements = [] self.functions = {} def et(self, parent): ci = ET.SubElement(parent, "scope") for i_attr in self.dump_attrs(): val = getattr(self, i_attr) if val: ci.attrib[i_attr] = val for i_elem in self.subelements: i_elem.et(ci) return ci def dump_attrs(self): return ["ilk", "name", "line", "lineend", "attributes", "signature", "doc"] def add_function(self, function): if self.functions.has_key(function.name): f = self.functions[function.name] f.signature += "\n" + function.signature else: self.functions[function.name] = function self.subelements.append(function) class CiBlob(CiScope): def __init__(self, name, lang): super(CiBlob, self).__init__("blob", name) self.lang = lang self.src = None def et(self, parent): fet = ET.SubElement(parent, "file", lang = self.lang, mtime = "0", path = self.name.strip() + ".pyd") old_name = self.name self.name = "PyQt4." + self.name.strip() res = super(CiBlob, self).et(fet) self.name = old_name return res def dump_attrs(self): return super(CiBlob, self).dump_attrs() + ["lang", "src"] class CiClass(CiScope): def __init__(self, name): super(CiClass, self).__init__("class", name) self.classrefs = None self.interfaces = None self.mixinrefs = None def dump_attrs(self): return super(CiClass, self).dump_attrs() + ["classrefs", "interfaces", "mixinrefs"] class CiFunction(CiScope): def __init__(self, name): super(CiFunction, self).__init__("function", name) self.returns = None self.arguments = [] def dump_attrs(self): return super(CiFunction, self).dump_attrs() + ["returns"] class PyQTDocScanner(object): def __init__(self, doc_dir): self.doc_dir = doc_dir def scan_module(self, fname): filename = os.path.join(self.doc_dir, fname + ".html") with open(filename, "r") as f: txt = f.read().replace(" ", "") module_name = re.search(MODULE_NAME_RE, txt).group(1) module = CiBlob(module_name, "Python") module.doc = re.search(MODULE_DESC_RE, txt).group(1) for i_m in re.finditer(CLASS_IN_MOD_RE, txt): cls = self.scan_class(i_m.group("url")) module.subelements.append(cls) self.scan_ci_elements(module, txt) return module def scan_class(self, fname): filename = os.path.join(self.doc_dir, fname + ".html") with open(filename, "r") as f: txt = f.read().replace(" ", "") class_name = re.search(CLASS_NAME_RE, txt).group(1) cls = CiClass(class_name) cls.classrefs = self.scan_inheritance(txt) self.scan_ci_elements(cls, txt) return cls def scan_inheritance(self, txt): inhm = re.search(INHERITS_RE, txt) if not inhm: return None all = re.findall(INHERITS_ONE_RE, inhm.group(1)) return " ".join(all) def scan_ci_elements(self, ci, mod_txt): self.scan_enums(ci, mod_txt) for section in re.finditer(SECTION_RE, mod_txt): section_name = section.group('name') els = section.group('els') for elem in re.finditer(SECTION_EL_RE, els): if section_name in ["Module Functions", "Static Methods", "Qt Signals"]: f = self.scan_function(elem.group(1), mod_txt, True, section_name) ci.add_function(f) if section_name in ["Methods", "Special Methods"]: f = self.scan_function(elem.group(1), mod_txt, False, section_name) ci.add_function(f) def scan_enums(self, ci, mod_txt): for i_enum in re.finditer(ENUM_RE, mod_txt): d = i_enum.groupdict() v = CiVariable(d["name"]) v.attributes = "constant" v.citdl = "int" v.doc = "-> %s\n%s" % (d["value"], strip_html(d["description"])) ci.subelements.append(v) def scan_function(self, el_txt, doc_txt, is_static, section_name): d = re.search(SECTION_EL_DETAILS_RE, el_txt).groupdict() cif = CiFunction(d["name"]) cif.returns = d["return_type"].strip().replace("", " ").replace("", "") args = self.figure_args(d["args"].strip(), is_static) (cif.signature, cif.doc) = self.create_sig(cif.name, args, cif.returns) if section_name == "Qt Signals": cif.signature += " (signal)" one_liner = self.scan_one_line_doc(d["anhor"], doc_txt) if one_liner: if cif.doc: cif.doc += "\n" cif.doc += one_liner return cif def scan_one_line_doc(self, anhor_name, txt): r = r""".+?

    ()""" % (anhor_name.strip()) match = re.search(r, txt) if not match or len(match.group(1)) == 3: return "" return strip_html(match.group(1)).split(".")[0] + "." def create_sig(self, name, args, returns): sarg = [] doc = [] for i_type, i_name, i_def, _ in args: sing = i_name if i_def: sing += " = " + i_def if i_type: doc.append("%s %s" % (i_type, i_name)) sarg.append(sing) s = "%s(%s)" % (name, ", ".join(sarg)) if returns and not returns.startswith("void"): return s + " -> " + returns, ", ".join(doc) return s, ", ".join(doc) def figure_args(self, args_with_html, is_static): args = [] awhs = args_with_html[1:-1].replace(" ", "").split(",") if not is_static: awhs = awhs[1:] for i_part in awhs: if not i_part.strip(): continue (targ, default) = ([e.strip() for e in i_part.split("=")] + [""])[:2] type = "" arg = targ found = False for rexp in [TYPE_ARG_RE, ARG_RE, ARG_NOI_RE]: m = rexp.match(targ) if not m: continue d = m.groupdict() type = d.get("type", "") arg = d.get("name", "") found = True break args.append((type, arg, default, found)) return args if __name__ == "__main__": sc = PyQTDocScanner(r"""C:\Users\wuub\Desktop\py\PyQt-win-gpl-4.5.2-snapshot-20090703\doc\html""") root = CiRoot() for i_mod_name in MODULES: mod = sc.scan_module(i_mod_name) root.blobs.append(mod) with open("PyQt.xml", "w") as f: f.write("""\n""") ET.ElementTree(root.et()).write(f)