File: Synopsis/Parsers/Python/SXRGenerator.py 1
2
3
4
5
6
7
8import parser
9import token
10import tokenize
11import symbol
12import keyword
13
14HAVE_ENCODING_DECL = hasattr(symbol, "encoding_decl")
15HAVE_IMPORT_NAME = hasattr(symbol, "import_name")
16HAVE_DECORATOR = hasattr(symbol,"decorator")
17
18def num_tokens(ptree):
19 """Count the number of leaf tokens in the given ptree."""
20
21 if type(ptree) == str: return 1
22 else: return sum([num_tokens(n) for n in ptree[1:]])
23
24
25class LexerDebugger:
26
27 def __init__(self, lexer):
28
29 self.lexer = lexer
30
31 def next(self):
32
33 n = self.lexer.next()
34 print 'next is "%s" (%s)'%(n[1], n[0])
35 return n
36
37header="""<sxr filename="%(filename)s">
38<line>"""
39
40trailer="""</line>
41</sxr>
42"""
43
44def escape(text):
45
46 for p in [('&', '&'), ('"', '"'), ('<', '<'), ('>', '>'),]:
47 text = text.replace(*p)
48 return text
49
50
51class SXRGenerator:
52 """"""
53
54 def __init__(self):
55 """"""
56
57 self.handlers = {}
58 self.handlers[token.ENDMARKER] = self.handle_end_marker
59 self.handlers[token.NEWLINE] = self.handle_newline
60 self.handlers[token.INDENT] = self.handle_indent
61 self.handlers[token.DEDENT] = self.handle_dedent
62 self.handlers[token.STRING] = self.handle_string
63 self.handlers[symbol.funcdef]= self.handle_function
64 self.handlers[symbol.parameters] = self.handle_parameters
65 self.handlers[symbol.classdef] = self.handle_class
66 self.handlers[token.NAME] = self.handle_name
67 self.handlers[symbol.expr_stmt] = self.handle_expr_stmt
68
69 self.handlers[symbol.power] = self.handle_power
70 if HAVE_ENCODING_DECL:
71 self.handlers[symbol.encoding_decl] = self.handle_encoding_decl
72 if HAVE_IMPORT_NAME:
73 self.handlers[symbol.import_as_names] = self.handle_import_as_names
74 self.handlers[symbol.dotted_as_names] = self.handle_dotted_as_names
75 self.handlers[symbol.import_from] = self.handle_import_from
76 self.handlers[symbol.import_name] = self.handle_import_name
77 else:
78 self.handlers[symbol.import_stmt] = self.handle_import
79 if HAVE_DECORATOR:
80 self.handlers[symbol.decorator] = self.handle_decorator
81
82 self.col = 0
83 self.lineno = 1
84 self.parameters = []
85 self.scopes = []
86
87 def process_file(self, scope, sourcefile, sxr):
88
89 self.scopes = list(scope)
90 input = open(sourcefile.abs_name, 'r+')
91 src = input.readlines()
92 self.lines = len(`len(src) + 1`)
93 ptree = parser.ast2tuple(parser.suite(''.join(src)))
94 input.seek(0)
95 self.lexer = tokenize.generate_tokens(input.readline)
96
97 self.sxr = open(sxr, 'w+')
98 lineno_template = '%%%ds' % self.lines
99 lineno = lineno_template % self.lineno
100 self.sxr.write(header % {'filename': sourcefile.name})
101 try:
102 self.handle(ptree)
103 except StopIteration:
104 raise
105 self.sxr.write(trailer)
106 self.sxr.close()
107 self.scopes.pop()
108
109 def handle(self, ptree):
110
111 if type(ptree) == tuple:
112 kind = ptree[0]
113 value = ptree[1:]
114 handler = self.handlers.get(kind, self.default_handler)
115 handler(value)
116 else:
117 raise Exception("Process error: Type is not a tuple %s" % str(ptree))
118
119
120 def default_handler(self, ptree):
121
122 for node in ptree:
123 if type(node) == tuple: self.handle(node)
124 elif type(node) == str: self.handle_token(node)
125 else: raise Exception("Invalid ptree node")
126
127
128 def next_token(self):
129 """Return the next visible token.
130 Process tokens that are not part of the parse tree silently."""
131
132 t = self.lexer.next()
133 while t[0] in [tokenize.NL, tokenize.COMMENT]:
134 if t[0] is tokenize.NL:
135 self.print_newline()
136 elif t[0] is tokenize.COMMENT:
137 self.print_token(t)
138 if t[1][-1] == '\n': self.print_newline()
139 t = self.lexer.next()
140 return t
141
142
143 def handle_token(self, item = None):
144
145 t = self.next_token()
146 if item is not None and t[1] != item:
147 raise 'Internal error in line %d: expected "%s", got "%s" (%d)'%(self.lineno, item, t[1], t[0])
148 else:
149 self.print_token(t)
150
151
152 def handle_name_as_xref(self, xref, name, from_ = None, type = None):
153
154 kind, value, (srow, scol), (erow, ecol), line = self.next_token()
155 if (kind, value) != (token.NAME, name):
156 raise 'Internal error in line %d: expected name "%s", got "%s" (%d)'%(name, self.lineno, item, t[1], t[0])
157
158 if self.col != scol:
159 self.sxr.write(' ' * (scol - self.col))
160 attrs = []
161 if from_: attrs.append('from="%s"'%from_)
162 if type: attrs.append('type="%s"'%type)
163 a = '<a href="%s" %s>%s</a>'%('.'.join(xref), ' '.join(attrs), value)
164 self.sxr.write(a)
165 self.col = ecol
166
167
168 def handle_tokens(self, ptree):
169
170 tokens = num_tokens(ptree)
171 for i in xrange(tokens):
172 self.handle_token()
173
174
175 def handle_end_marker(self, nodes): pass
176 def handle_newline(self, nodes):
177
178 self.handle_token()
179
180
181 def handle_indent(self, indent):
182
183 self.handle_token()
184
185
186 def handle_dedent(self, dedent):
187
188 self.handle_token()
189
190
191 def handle_string(self, content):
192
193 self.handle_token()
194
195
196 def handle_function(self, nodes):
197
198 if HAVE_DECORATOR:
199 if nodes[0][0] == symbol.decorators:
200 offset = 1
201
202 self.handle(nodes[0])
203 else:
204 offset = 0
205 else:
206 offset = 0
207
208 def_token = nodes[0 + offset]
209 self.handle_token(def_token[1])
210 name = nodes[1 + offset][1]
211 qname = tuple(self.scopes + [name])
212 self.handle_name_as_xref(qname, name, from_='.'.join(self.scopes), type='definition')
213
214 self.handle(nodes[2 + offset])
215
216 colon_token = nodes[3 + offset]
217 self.handle_token(colon_token[1])
218 body = nodes[4 + offset]
219
220
221 self.handle_tokens(body)
222
223
224
225
226
227 def handle_parameters(self, nodes):
228
229 self.handle_token(nodes[0][1])
230 if nodes[1][0] == symbol.varargslist:
231 args = list(nodes[1][1:])
232 while args:
233 if args[0][0] == token.COMMA:
234 self.handle_token(args[0][1])
235 pass
236 elif args[0][0] == symbol.fpdef:
237 self.handle_tokens(args[0])
238 elif args[0][0] == token.EQUAL:
239 self.handle_token(args[0][1])
240 del args[0]
241 self.handle_tokens(args[0])
242 elif args[0][0] == token.DOUBLESTAR:
243 self.handle_token(args[0][1])
244 del args[0]
245 self.handle_token(args[0][1])
246 elif args[0][0] == token.STAR:
247 self.handle_token(args[0][1])
248 del args[0]
249 self.handle_token(args[0][1])
250 else:
251 print "Unknown symbol:",args[0]
252 del args[0]
253 self.handle_token(nodes[-1][1])
254
255
256 def handle_class(self, nodes):
257
258 class_token = nodes[0]
259 self.handle_token(class_token[1])
260 name = nodes[1][1]
261 qname = tuple(self.scopes + [name])
262 self.handle_name_as_xref(qname, name, from_='.'.join(self.scopes), type='definition')
263 base_clause = nodes[2][0] == token.LPAR and nodes[3] or None
264 self.handle_tokens(nodes[2])
265 bases = []
266 if base_clause:
267 self.handle_tokens(base_clause)
268 self.handle_token(')')
269 self.handle_token(':')
270
271 body = nodes[6]
272 else:
273 body = nodes[3]
274 self.scopes.append(name)
275 self.handle(body)
276 self.scopes.pop()
277
278
279 def handle_name(self, content):
280
281 self.handle_token(content[0])
282
283
284 def handle_expr_stmt(self, nodes):
285
286 for n in nodes: self.handle_tokens(n)
287
288
289 def handle_dotted_name(self, dname, rest):
290
291 self.handle_token(dname[0])
292 for name in dname[1:]:
293 self.handle_token('.')
294 self.handle_token(name)
295 map(self.handle, rest)
296
297
298 def handle_op(self, nodes): pass
299
300
301 def handle_power(self, content):
302
303 def get_dotted_name(content):
304 if content[0][0] != symbol.atom or content[0][1][0] != token.NAME:
305 return None
306 dotted_name = [content[0][1][1]]
307 i = 1
308 for param in content[1:]:
309 if param[0] != symbol.trailer: break
310 if param[1][0] != token.DOT: break
311 if param[2][0] != token.NAME: break
312 dotted_name.append(param[2][1])
313 i += 1
314 if i < len(content): return dotted_name, content[i:]
315 else: return dotted_name, []
316
317 name = get_dotted_name(content)
318 if name: self.handle_dotted_name(*name)
319 else: map(self.handle, content)
320
321
322 def handle_encoding_decl(self, nodes):
323
324
325 for n in nodes[:-1]: self.handle(n)
326
327 def handle_import_as_names(self, nodes):
328
329 for n in nodes: self.handle(n)
330
331
332 def handle_dotted_as_names(self, nodes):
333
334 for n in nodes: self.handle(n)
335
336
337 def handle_import_from(self, nodes):
338
339 self.handle_token('from')
340 self.handle(nodes[1])
341 self.handle_token('import')
342 self.handle(nodes[3])
343
344
345 def handle_import_name(self, nodes):
346
347 self.handle_token('import')
348 self.handle_dotted_as_names(nodes[1][1:])
349
350
351 def handle_import(self, nodes):
352
353
354 for n in nodes: self.handle(n)
355
356
357 def handle_decorator(self, nodes): pass
358
359
360 def print_token(self, t):
361
362 kind, value, (srow, scol), (erow, ecol), line = t
363 if kind == token.NEWLINE:
364 self.print_newline()
365 else:
366 if self.col != scol:
367 self.sxr.write(' ' * (scol - self.col))
368 if keyword.iskeyword(value):
369 format = '<span class="py-keyword">%s</span>'
370 elif kind == token.STRING:
371 format = '<span class="py-string">%s</span>'
372 chunks = value.split('\n')
373 for c in chunks[:-1]:
374 self.sxr.write(format % escape(c))
375 self.print_newline()
376 value = chunks[-1]
377
378 elif kind == tokenize.COMMENT:
379 format = '<span class="py-comment">%s</span>'
380 if value[-1] == '\n': value = value[:-1]
381 else:
382 format = '%s'
383
384 self.sxr.write(format % escape(value))
385 self.col = ecol
386
387
388 def print_newline(self):
389
390 self.col = 0
391 self.lineno += 1
392 self.sxr.write('</line>\n')
393 self.sxr.write('<line>')
394
395
396
Generated on Thu Apr 16 16:27:16 2009 by
synopsis (version devel)