contrib/redox.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215

#!/usr/bin/python
#
#  Copyright (c) 2008-2012 The Tor Project, Inc.
#  See LICENSE for licensing information.
#
# Hi!
# I'm redox.py, the Tor redocumentation tool!
# I am a horrible hack!
# I read the output of doxygen from stderr, and add missing DOCDOC comments
#   to tell you where documentation should go!
# To use me, edit the stuff below...
#  ...and run 'make doxygen 2>doxygen.stderr' ...
#  ...and run ./contrib/redox.py < doxygen.stderr !
# I'll make a bunch of new files by adding missing DOCDOC comments to your
#    source.  Those files will have names like ./src/common/util.c.newdoc.
# You will want to look over the changes by hand before checking them in.
#
# So, here's your workflow:
#
# 0. Make sure you're running a bourne shell for the redirects below.
# 1. make doxygen 1>doxygen.stdout 2>doxygen.stderr.
# 2. grep Warning doxygen.stderr | grep -v 'is not documented' | less
#      [This will tell you about all the bogus doxygen output you have]
# 3. python ./contrib/redox.py <doxygen.stderr
#      [This will make lots of .newdoc files with DOCDOC comments for
#       whatever was missing documentation.]
# 4. Look over those .newdoc files, and see which docdoc comments you
#     want to merge into the main file.  If it's all good, just run
#     "mv fname.c.newdoc fname.c".  Otherwise, you'll need to merge
#     the parts you like by hand.

# Which files should we ignore warning from?  Mostly, these are external
# files that we've snarfed in from somebody else, whose C we do no intend
# to document for them.
SKIP_FILES = [ "OpenBSD_malloc_Linux.c",
               "eventdns.c",
               "eventdns.h",
               "strlcat.c",
               "strlcpy.c",
               "sha256.c",
               "sha256.h",
               "aes.c",
               "aes.h" ]

# What names of things never need javadoc
SKIP_NAME_PATTERNS = [ r'^.*_c_id$',
                       r'^.*_H_ID$' ]

# Which types of things should get DOCDOC comments added if they are
# missing documentation?  Recognized types are in KINDS below.
ADD_DOCDOCS_TO_TYPES = [ 'function', 'type', 'typedef' ]
ADD_DOCDOCS_TO_TYPES += [ 'variable', ]

# ====================
# The rest of this should not need hacking.

import re
import sys

KINDS = [ "type", "field", "typedef", "define", "function", "variable",
          "enumeration" ]

NODOC_LINE_RE = re.compile(r'^([^:]+):(\d+): (\w+): (.*) is not documented\.$')

THING_RE = re.compile(r'^Member ([a-zA-Z0-9_]+).*\((typedef|define|function|variable|enumeration)\) of (file|class) ')

SKIP_NAMES = [re.compile(s) for s in SKIP_NAME_PATTERNS]

def parsething(thing):
    """I figure out what 'foobar baz in quux quum is not documented' means,
       and return: the name of the foobar, and the kind of the foobar.
    """
    if thing.startswith("Compound "):
        tp, name = "type", thing.split()[1]
    else:
        m = THING_RE.match(thing)
        if not m:
            print thing, "???? Format didn't match."
            return None, None
        else:
            name, tp, parent = m.groups()
            if parent == 'class':
                if tp == 'variable' or tp == 'function':
                    tp = 'field'

    return name, tp

def read():
    """I snarf doxygen stderr from stdin, and parse all the "foo has no
       documentation messages.  I return a map from filename to lists
       of tuples of (alleged line number, name of thing, kind of thing)
    """
    errs = {}
    for line in sys.stdin:
        m = NODOC_LINE_RE.match(line)
        if m:
            file, line, tp, thing = m.groups()
            assert tp.lower() == 'warning'
            name, kind = parsething(thing)
            errs.setdefault(file, []).append((int(line), name, kind))

    return errs

def findline(lines, lineno, ident):
    """Given a list of all the lines in the file (adjusted so 1-indexing works),
       a line number that ident is alledgedly on, and ident, I figure out
       the line where ident was really declared."""
    for lineno in xrange(lineno, 0, -1):
        if ident in lines[lineno]:
            return lineno

    return None

FUNC_PAT = re.compile(r"^[A-Za-z0-9_]+\(")

def hascomment(lines, lineno, kind):
    """I return true if it looks like there's already a good comment about
       the thing on lineno of lines of type kind. """
    if "*/" in lines[lineno-1]:
        return True
    if kind == 'function' and FUNC_PAT.match(lines[lineno]):
        if "*/" in lines[lineno-2]:
            return True
    return False

def hasdocdoc(lines, lineno, kind):
    """I return true if it looks like there's already a docdoc comment about
       the thing on lineno of lines of type kind."""
    if "DOCDOC" in lines[lineno] or "DOCDOC" in lines[lineno-1]:
        return True
    if kind == 'function' and FUNC_PAT.match(lines[lineno]):
        if "DOCDOC" in lines[lineno-2]:
            return True
    return False

def checkf(fn, errs):
    """I go through the output of read() for a single file, and build a list
       of tuples of things that want DOCDOC comments.  Each tuple has:
       the line number where the comment goes; the kind of thing; its name.
    """
    for skip in SKIP_FILES:
        if fn.endswith(skip):
            print "Skipping",fn
            return

    comments = []
    lines = [ None ]
    try:
        lines.extend( open(fn, 'r').readlines() )
    except IOError:
        return

    for line, name, kind in errs:
        if any(pat.match(name) for pat in SKIP_NAMES):
            continue

        if kind not in ADD_DOCDOCS_TO_TYPES:
            continue

        ln = findline(lines, line, name)
        if ln == None:
            print "Couldn't find the definition of %s allegedly on %s of %s"%(
                name, line, fn)
        else:
            if hasdocdoc(lines, line, kind):
#                print "Has a DOCDOC"
#                print fn, line, name, kind
#                print "\t",lines[line-2],
#                print "\t",lines[line-1],
#                print "\t",lines[line],
#                print "-------"
                pass
            else:
                if kind == 'function' and FUNC_PAT.match(lines[ln]):
                    ln = ln - 1

                comments.append((ln, kind, name))

    return comments

def applyComments(fn, entries):
    """I apply lots of comments to the file in fn, making a new .newdoc file.
    """
    N = 0

    lines = [ None ]
    try:
        lines.extend( open(fn, 'r').readlines() )
    except IOError:
        return

    # Process the comments in reverse order by line number, so that
    # the line numbers for the ones we haven't added yet remain valid
    # until we add them.  Standard trick.
    entries.sort()
    entries.reverse()

    for ln, kind, name in entries:

        lines.insert(ln, "/* DOCDOC %s */\n"%name)
        N += 1

    outf = open(fn+".newdoc", 'w')
    for line in lines[1:]:
        outf.write(line)
    outf.close()

    print "Added %s DOCDOCs to %s" %(N, fn)

e = read()

for fn, errs in e.iteritems():
    comments = checkf(fn, errs)
    if comments:
        applyComments(fn, comments)