Spaces:
Runtime error
Runtime error
#!/usr/bin/env python3 | |
# -*- coding: utf-8 -*- | |
# | |
# Syntax: mkdoc.py [-I<path> ..] [.. a list of header files ..] | |
# | |
# Extract documentation from C++ header files to use it in Python bindings | |
# | |
import os | |
import sys | |
import platform | |
import re | |
import textwrap | |
from clang import cindex | |
from clang.cindex import CursorKind | |
from collections import OrderedDict | |
from glob import glob | |
from threading import Thread, Semaphore | |
from multiprocessing import cpu_count | |
RECURSE_LIST = [ | |
CursorKind.TRANSLATION_UNIT, | |
CursorKind.NAMESPACE, | |
CursorKind.CLASS_DECL, | |
CursorKind.STRUCT_DECL, | |
CursorKind.ENUM_DECL, | |
CursorKind.CLASS_TEMPLATE | |
] | |
PRINT_LIST = [ | |
CursorKind.CLASS_DECL, | |
CursorKind.STRUCT_DECL, | |
CursorKind.ENUM_DECL, | |
CursorKind.ENUM_CONSTANT_DECL, | |
CursorKind.CLASS_TEMPLATE, | |
CursorKind.FUNCTION_DECL, | |
CursorKind.FUNCTION_TEMPLATE, | |
CursorKind.CONVERSION_FUNCTION, | |
CursorKind.CXX_METHOD, | |
CursorKind.CONSTRUCTOR, | |
CursorKind.FIELD_DECL | |
] | |
PREFIX_BLACKLIST = [ | |
CursorKind.TRANSLATION_UNIT | |
] | |
CPP_OPERATORS = { | |
'<=': 'le', '>=': 'ge', '==': 'eq', '!=': 'ne', '[]': 'array', | |
'+=': 'iadd', '-=': 'isub', '*=': 'imul', '/=': 'idiv', '%=': | |
'imod', '&=': 'iand', '|=': 'ior', '^=': 'ixor', '<<=': 'ilshift', | |
'>>=': 'irshift', '++': 'inc', '--': 'dec', '<<': 'lshift', '>>': | |
'rshift', '&&': 'land', '||': 'lor', '!': 'lnot', '~': 'bnot', | |
'&': 'band', '|': 'bor', '+': 'add', '-': 'sub', '*': 'mul', '/': | |
'div', '%': 'mod', '<': 'lt', '>': 'gt', '=': 'assign', '()': 'call' | |
} | |
CPP_OPERATORS = OrderedDict( | |
sorted(CPP_OPERATORS.items(), key=lambda t: -len(t[0]))) | |
job_count = cpu_count() | |
job_semaphore = Semaphore(job_count) | |
class NoFilenamesError(ValueError): | |
pass | |
def d(s): | |
return s if isinstance(s, str) else s.decode('utf8') | |
def sanitize_name(name): | |
name = re.sub(r'type-parameter-0-([0-9]+)', r'T\1', name) | |
for k, v in CPP_OPERATORS.items(): | |
name = name.replace('operator%s' % k, 'operator_%s' % v) | |
name = re.sub('<.*>', '', name) | |
name = ''.join([ch if ch.isalnum() else '_' for ch in name]) | |
name = re.sub('_$', '', re.sub('_+', '_', name)) | |
return '__doc_' + name | |
def process_comment(comment): | |
result = '' | |
# Remove C++ comment syntax | |
leading_spaces = float('inf') | |
for s in comment.expandtabs(tabsize=4).splitlines(): | |
s = s.strip() | |
if s.startswith('/*'): | |
s = s[2:].lstrip('*') | |
elif s.endswith('*/'): | |
s = s[:-2].rstrip('*') | |
elif s.startswith('///'): | |
s = s[3:] | |
if s.startswith('*'): | |
s = s[1:] | |
if len(s) > 0: | |
leading_spaces = min(leading_spaces, len(s) - len(s.lstrip())) | |
result += s + '\n' | |
if leading_spaces != float('inf'): | |
result2 = "" | |
for s in result.splitlines(): | |
result2 += s[leading_spaces:] + '\n' | |
result = result2 | |
# Doxygen tags | |
cpp_group = r'([\w:]+)' | |
param_group = r'([\[\w:\]]+)' | |
s = result | |
s = re.sub(r'\\c\s+%s' % cpp_group, r'``\1``', s) | |
s = re.sub(r'\\a\s+%s' % cpp_group, r'*\1*', s) | |
s = re.sub(r'\\e\s+%s' % cpp_group, r'*\1*', s) | |
s = re.sub(r'\\em\s+%s' % cpp_group, r'*\1*', s) | |
s = re.sub(r'\\b\s+%s' % cpp_group, r'**\1**', s) | |
s = re.sub(r'\\ingroup\s+%s' % cpp_group, r'', s) | |
s = re.sub(r'\\param%s?\s+%s' % (param_group, cpp_group), | |
r'\n\n$Parameter ``\2``:\n\n', s) | |
s = re.sub(r'\\tparam%s?\s+%s' % (param_group, cpp_group), | |
r'\n\n$Template parameter ``\2``:\n\n', s) | |
for in_, out_ in { | |
'return': 'Returns', | |
'author': 'Author', | |
'authors': 'Authors', | |
'copyright': 'Copyright', | |
'date': 'Date', | |
'remark': 'Remark', | |
'sa': 'See also', | |
'see': 'See also', | |
'extends': 'Extends', | |
'throw': 'Throws', | |
'throws': 'Throws' | |
}.items(): | |
s = re.sub(r'\\%s\s*' % in_, r'\n\n$%s:\n\n' % out_, s) | |
s = re.sub(r'\\details\s*', r'\n\n', s) | |
s = re.sub(r'\\brief\s*', r'', s) | |
s = re.sub(r'\\short\s*', r'', s) | |
s = re.sub(r'\\ref\s*', r'', s) | |
s = re.sub(r'\\code\s?(.*?)\s?\\endcode', | |
r"```\n\1\n```\n", s, flags=re.DOTALL) | |
# HTML/TeX tags | |
s = re.sub(r'<tt>(.*?)</tt>', r'``\1``', s, flags=re.DOTALL) | |
s = re.sub(r'<pre>(.*?)</pre>', r"```\n\1\n```\n", s, flags=re.DOTALL) | |
s = re.sub(r'<em>(.*?)</em>', r'*\1*', s, flags=re.DOTALL) | |
s = re.sub(r'<b>(.*?)</b>', r'**\1**', s, flags=re.DOTALL) | |
s = re.sub(r'\\f\$(.*?)\\f\$', r'$\1$', s, flags=re.DOTALL) | |
s = re.sub(r'<li>', r'\n\n* ', s) | |
s = re.sub(r'</?ul>', r'', s) | |
s = re.sub(r'</li>', r'\n\n', s) | |
s = s.replace('``true``', '``True``') | |
s = s.replace('``false``', '``False``') | |
# Re-flow text | |
wrapper = textwrap.TextWrapper() | |
wrapper.expand_tabs = True | |
wrapper.replace_whitespace = True | |
wrapper.drop_whitespace = True | |
wrapper.width = 70 | |
wrapper.initial_indent = wrapper.subsequent_indent = '' | |
result = '' | |
in_code_segment = False | |
for x in re.split(r'(```)', s): | |
if x == '```': | |
if not in_code_segment: | |
result += '```\n' | |
else: | |
result += '\n```\n\n' | |
in_code_segment = not in_code_segment | |
elif in_code_segment: | |
result += x.strip() | |
else: | |
for y in re.split(r'(?: *\n *){2,}', x): | |
wrapped = wrapper.fill(re.sub(r'\s+', ' ', y).strip()) | |
if len(wrapped) > 0 and wrapped[0] == '$': | |
result += wrapped[1:] + '\n' | |
wrapper.initial_indent = \ | |
wrapper.subsequent_indent = ' ' * 4 | |
else: | |
if len(wrapped) > 0: | |
result += wrapped + '\n\n' | |
wrapper.initial_indent = wrapper.subsequent_indent = '' | |
return result.rstrip().lstrip('\n') | |
def extract(filename, node, prefix, output): | |
if not (node.location.file is None or | |
os.path.samefile(d(node.location.file.name), filename)): | |
return 0 | |
if node.kind in RECURSE_LIST: | |
sub_prefix = prefix | |
if node.kind not in PREFIX_BLACKLIST: | |
if len(sub_prefix) > 0: | |
sub_prefix += '_' | |
sub_prefix += d(node.spelling) | |
for i in node.get_children(): | |
extract(filename, i, sub_prefix, output) | |
if node.kind in PRINT_LIST: | |
comment = d(node.raw_comment) if node.raw_comment is not None else '' | |
comment = process_comment(comment) | |
sub_prefix = prefix | |
if len(sub_prefix) > 0: | |
sub_prefix += '_' | |
if len(node.spelling) > 0: | |
name = sanitize_name(sub_prefix + d(node.spelling)) | |
output.append((name, filename, comment)) | |
class ExtractionThread(Thread): | |
def __init__(self, filename, parameters, output): | |
Thread.__init__(self) | |
self.filename = filename | |
self.parameters = parameters | |
self.output = output | |
job_semaphore.acquire() | |
def run(self): | |
print('Processing "%s" ..' % self.filename, file=sys.stderr) | |
try: | |
index = cindex.Index( | |
cindex.conf.lib.clang_createIndex(False, True)) | |
tu = index.parse(self.filename, self.parameters) | |
extract(self.filename, tu.cursor, '', self.output) | |
finally: | |
job_semaphore.release() | |
def read_args(args): | |
parameters = [] | |
filenames = [] | |
if "-x" not in args: | |
parameters.extend(['-x', 'c++']) | |
if not any(it.startswith("-std=") for it in args): | |
parameters.append('-std=c++11') | |
if platform.system() == 'Darwin': | |
dev_path = '/Applications/Xcode.app/Contents/Developer/' | |
lib_dir = dev_path + 'Toolchains/XcodeDefault.xctoolchain/usr/lib/' | |
sdk_dir = dev_path + 'Platforms/MacOSX.platform/Developer/SDKs' | |
libclang = lib_dir + 'libclang.dylib' | |
if os.path.exists(libclang): | |
cindex.Config.set_library_path(os.path.dirname(libclang)) | |
if os.path.exists(sdk_dir): | |
sysroot_dir = os.path.join(sdk_dir, next(os.walk(sdk_dir))[1][0]) | |
parameters.append('-isysroot') | |
parameters.append(sysroot_dir) | |
elif platform.system() == 'Linux': | |
# cython.util.find_library does not find `libclang` for all clang | |
# versions and distributions. LLVM switched to a monolithical setup | |
# that includes everything under /usr/lib/llvm{version_number}/ | |
# We therefore glob for the library and select the highest version | |
library_file = sorted(glob("/usr/lib/llvm-*/lib/libclang.so"), reverse=True)[0] | |
cindex.Config.set_library_file(library_file) | |
# clang doesn't find its own base includes by default on Linux, | |
# but different distros install them in different paths. | |
# Try to autodetect, preferring the highest numbered version. | |
def clang_folder_version(d): | |
return [int(ver) for ver in re.findall(r'(?<!lib)(?<!\d)\d+', d)] | |
clang_include_dir = max(( | |
path | |
for libdir in ['lib64', 'lib', 'lib32'] | |
for path in glob('/usr/%s/clang/*/include' % libdir) | |
if os.path.isdir(path) | |
), default=None, key=clang_folder_version) | |
if clang_include_dir: | |
parameters.extend(['-isystem', clang_include_dir]) | |
for item in args: | |
if item.startswith('-'): | |
parameters.append(item) | |
else: | |
filenames.append(item) | |
if len(filenames) == 0: | |
raise NoFilenamesError("args parameter did not contain any filenames") | |
return parameters, filenames | |
def extract_all(args): | |
parameters, filenames = read_args(args) | |
output = [] | |
for filename in filenames: | |
thr = ExtractionThread(filename, parameters, output) | |
thr.start() | |
print('Waiting for jobs to finish ..', file=sys.stderr) | |
for i in range(job_count): | |
job_semaphore.acquire() | |
return output | |
def write_header(comments, out_file=sys.stdout): | |
print('''/* | |
This file contains docstrings for the Python bindings. | |
Do not edit! These were automatically extracted by mkdoc.py | |
*/ | |
#define __EXPAND(x) x | |
#define __COUNT(_1, _2, _3, _4, _5, _6, _7, COUNT, ...) COUNT | |
#define __VA_SIZE(...) __EXPAND(__COUNT(__VA_ARGS__, 7, 6, 5, 4, 3, 2, 1)) | |
#define __CAT1(a, b) a ## b | |
#define __CAT2(a, b) __CAT1(a, b) | |
#define __DOC1(n1) __doc_##n1 | |
#define __DOC2(n1, n2) __doc_##n1##_##n2 | |
#define __DOC3(n1, n2, n3) __doc_##n1##_##n2##_##n3 | |
#define __DOC4(n1, n2, n3, n4) __doc_##n1##_##n2##_##n3##_##n4 | |
#define __DOC5(n1, n2, n3, n4, n5) __doc_##n1##_##n2##_##n3##_##n4##_##n5 | |
#define __DOC6(n1, n2, n3, n4, n5, n6) __doc_##n1##_##n2##_##n3##_##n4##_##n5##_##n6 | |
#define __DOC7(n1, n2, n3, n4, n5, n6, n7) __doc_##n1##_##n2##_##n3##_##n4##_##n5##_##n6##_##n7 | |
#define DOC(...) __EXPAND(__EXPAND(__CAT2(__DOC, __VA_SIZE(__VA_ARGS__)))(__VA_ARGS__)) | |
#if defined(__GNUG__) | |
#pragma GCC diagnostic push | |
#pragma GCC diagnostic ignored "-Wunused-variable" | |
#endif | |
''', file=out_file) | |
name_ctr = 1 | |
name_prev = None | |
for name, _, comment in list(sorted(comments, key=lambda x: (x[0], x[1]))): | |
if name == name_prev: | |
name_ctr += 1 | |
name = name + "_%i" % name_ctr | |
else: | |
name_prev = name | |
name_ctr = 1 | |
print('\nstatic const char *%s =%sR"doc(%s)doc";' % | |
(name, '\n' if '\n' in comment else ' ', comment), file=out_file) | |
print(''' | |
#if defined(__GNUG__) | |
#pragma GCC diagnostic pop | |
#endif | |
''', file=out_file) | |
def mkdoc(args): | |
args = list(args) | |
out_path = None | |
for idx, arg in enumerate(args): | |
if arg.startswith("-o"): | |
args.remove(arg) | |
try: | |
out_path = arg[2:] or args.pop(idx) | |
except IndexError: | |
print("-o flag requires an argument") | |
exit(-1) | |
break | |
comments = extract_all(args) | |
if out_path: | |
try: | |
with open(out_path, 'w') as out_file: | |
write_header(comments, out_file) | |
except: | |
# In the event of an error, don't leave a partially-written | |
# output file. | |
try: | |
os.unlink(out_path) | |
except: | |
pass | |
raise | |
else: | |
write_header(comments) | |
if __name__ == '__main__': | |
try: | |
mkdoc(sys.argv[1:]) | |
except NoFilenamesError: | |
print('Syntax: %s [.. a list of header files ..]' % sys.argv[0]) | |
exit(-1) | |