1
2
3
4
5
6
7
8
9
10 from __future__ import generators
11
12
13
14
15
16
17 tokens = (
18 'CPP_ID','CPP_INTEGER', 'CPP_FLOAT', 'CPP_STRING', 'CPP_CHAR', 'CPP_WS', 'CPP_COMMENT', 'CPP_POUND','CPP_DPOUND'
19 )
20
21 literals = "+-*/%|&~^<>=!?()[]{}.,;:\\\'\""
22
23
25 r'\s+'
26 t.lexer.lineno += t.value.count("\n")
27 return t
28
29 t_CPP_POUND = r'\#'
30 t_CPP_DPOUND = r'\#\#'
31
32
33 t_CPP_ID = r'[A-Za-z_][\w_]*'
34
35
37 r'(((((0x)|(0X))[0-9a-fA-F]+)|(\d+))([uU]|[lL]|[uU][lL]|[lL][uU])?)'
38 return t
39
40 t_CPP_INTEGER = CPP_INTEGER
41
42
43 t_CPP_FLOAT = r'((\d+)(\.\d+)(e(\+|-)?(\d+))? | (\d+)e(\+|-)?(\d+))([lL]|[fF])?'
44
45
47 r'\"([^\\\n]|(\\(.|\n)))*?\"'
48 t.lexer.lineno += t.value.count("\n")
49 return t
50
51
53 r'(L)?\'([^\\\n]|(\\(.|\n)))*?\''
54 t.lexer.lineno += t.value.count("\n")
55 return t
56
57
62
64 t.type = t.value[0]
65 t.value = t.value[0]
66 t.lexer.skip(1)
67 return t
68
69 import re
70 import copy
71 import time
72 import os.path
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91 _trigraph_pat = re.compile(r'''\?\?[=/\'\(\)\!<>\-]''')
92 _trigraph_rep = {
93 '=':'#',
94 '/':'\\',
95 "'":'^',
96 '(':'[',
97 ')':']',
98 '!':'|',
99 '<':'{',
100 '>':'}',
101 '-':'~'
102 }
103
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
124 - def __init__(self,name,value,arglist=None,variadic=False):
125 self.name = name
126 self.value = value
127 self.arglist = arglist
128 self.variadic = variadic
129 if variadic:
130 self.vararg = arglist[-1]
131 self.source = None
132
133
134
135
136
137
138
139
142 if lexer is None:
143 lexer = lex.lexer
144 self.lexer = lexer
145 self.macros = { }
146 self.path = []
147 self.temp_path = []
148
149
150 self.lexprobe()
151
152 tm = time.localtime()
153 self.define("__DATE__ \"%s\"" % time.strftime("%b %d %Y",tm))
154 self.define("__TIME__ \"%s\"" % time.strftime("%H:%M:%S",tm))
155 self.parser = None
156
157
158
159
160
161
162
164 tokens = []
165 self.lexer.input(text)
166 while True:
167 tok = self.lexer.token()
168 if not tok: break
169 tokens.append(tok)
170 return tokens
171
172
173
174
175
176
177
178 - def error(self,file,line,msg):
179 print >>sys.stderr,"%s:%d %s" % (file,line,msg)
180
181
182
183
184
185
186
187
188
189
191
192
193 self.lexer.input("identifier")
194 tok = self.lexer.token()
195 if not tok or tok.value != "identifier":
196 print "Couldn't determine identifier type"
197 else:
198 self.t_ID = tok.type
199
200
201 self.lexer.input("12345")
202 tok = self.lexer.token()
203 if not tok or int(tok.value) != 12345:
204 print "Couldn't determine integer type"
205 else:
206 self.t_INTEGER = tok.type
207 self.t_INTEGER_TYPE = type(tok.value)
208
209
210 self.lexer.input("\"filename\"")
211 tok = self.lexer.token()
212 if not tok or tok.value != "\"filename\"":
213 print "Couldn't determine string type"
214 else:
215 self.t_STRING = tok.type
216
217
218 self.lexer.input(" ")
219 tok = self.lexer.token()
220 if not tok or tok.value != " ":
221 self.t_SPACE = None
222 else:
223 self.t_SPACE = tok.type
224
225
226 self.lexer.input("\n")
227 tok = self.lexer.token()
228 if not tok or tok.value != "\n":
229 self.t_NEWLINE = None
230 print "Couldn't determine token for newlines"
231 else:
232 self.t_NEWLINE = tok.type
233
234 self.t_WS = (self.t_SPACE, self.t_NEWLINE)
235
236
237 chars = [ '<','>','#','##','\\','(',')',',','.']
238 for c in chars:
239 self.lexer.input(c)
240 tok = self.lexer.token()
241 if not tok or tok.value != c:
242 print "Unable to lex '%s' required for preprocessor" % c
243
244
245
246
247
248
249
251 self.path.append(path)
252
253
254
255
256
257
258
259
260
261
263 lex = self.lexer.clone()
264 lines = [x.rstrip() for x in input.splitlines()]
265 for i in xrange(len(lines)):
266 j = i+1
267 while lines[i].endswith('\\') and (j < len(lines)):
268 lines[i] = lines[i][:-1]+lines[j]
269 lines[j] = ""
270 j += 1
271
272 input = "\n".join(lines)
273 lex.input(input)
274 lex.lineno = 1
275
276 current_line = []
277 while True:
278 tok = lex.token()
279 if not tok:
280 break
281 current_line.append(tok)
282 if tok.type in self.t_WS and '\n' in tok.value:
283 yield current_line
284 current_line = []
285
286 if current_line:
287 yield current_line
288
289
290
291
292
293
294
296 i = 0
297 while i < len(tokens) and tokens[i].type in self.t_WS:
298 i += 1
299 del tokens[:i]
300 i = len(tokens)-1
301 while i >= 0 and tokens[i].type in self.t_WS:
302 i -= 1
303 del tokens[i+1:]
304 return tokens
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
324 args = []
325 positions = []
326 current_arg = []
327 nesting = 1
328 tokenlen = len(tokenlist)
329
330
331 i = 0
332 while (i < tokenlen) and (tokenlist[i].type in self.t_WS):
333 i += 1
334
335 if (i < tokenlen) and (tokenlist[i].value == '('):
336 positions.append(i+1)
337 else:
338 self.error(self.source,tokenlist[0].lineno,"Missing '(' in macro arguments")
339 return 0, [], []
340
341 i += 1
342
343 while i < tokenlen:
344 t = tokenlist[i]
345 if t.value == '(':
346 current_arg.append(t)
347 nesting += 1
348 elif t.value == ')':
349 nesting -= 1
350 if nesting == 0:
351 if current_arg:
352 args.append(self.tokenstrip(current_arg))
353 positions.append(i)
354 return i+1,args,positions
355 current_arg.append(t)
356 elif t.value == ',' and nesting == 1:
357 args.append(self.tokenstrip(current_arg))
358 positions.append(i+1)
359 current_arg = []
360 else:
361 current_arg.append(t)
362 i += 1
363
364
365 self.error(self.source,tokenlist[-1].lineno,"Missing ')' in macro arguments")
366 return 0, [],[]
367
368
369
370
371
372
373
374
375
377 macro.patch = []
378 macro.str_patch = []
379 macro.var_comma_patch = []
380 i = 0
381 while i < len(macro.value):
382 if macro.value[i].type == self.t_ID and macro.value[i].value in macro.arglist:
383 argnum = macro.arglist.index(macro.value[i].value)
384
385 if i > 0 and macro.value[i-1].value == '#':
386 macro.value[i] = copy.copy(macro.value[i])
387 macro.value[i].type = self.t_STRING
388 del macro.value[i-1]
389 macro.str_patch.append((argnum,i-1))
390 continue
391
392 elif (i > 0 and macro.value[i-1].value == '##'):
393 macro.patch.append(('c',argnum,i-1))
394 del macro.value[i-1]
395 continue
396 elif ((i+1) < len(macro.value) and macro.value[i+1].value == '##'):
397 macro.patch.append(('c',argnum,i))
398 i += 1
399 continue
400
401 else:
402 macro.patch.append(('e',argnum,i))
403 elif macro.value[i].value == '##':
404 if macro.variadic and (i > 0) and (macro.value[i-1].value == ',') and \
405 ((i+1) < len(macro.value)) and (macro.value[i+1].type == self.t_ID) and \
406 (macro.value[i+1].value == macro.vararg):
407 macro.var_comma_patch.append(i-1)
408 i += 1
409 macro.patch.sort(key=lambda x: x[2],reverse=True)
410
411
412
413
414
415
416
417
418
420
421 rep = [copy.copy(_x) for _x in macro.value]
422
423
424
425 str_expansion = {}
426 for argnum, i in macro.str_patch:
427 if argnum not in str_expansion:
428 str_expansion[argnum] = ('"%s"' % "".join([x.value for x in args[argnum]])).replace("\\","\\\\")
429 rep[i] = copy.copy(rep[i])
430 rep[i].value = str_expansion[argnum]
431
432
433 comma_patch = False
434 if macro.variadic and not args[-1]:
435 for i in macro.var_comma_patch:
436 rep[i] = None
437 comma_patch = True
438
439
440
441
442
443 expanded = { }
444 for ptype, argnum, i in macro.patch:
445
446 if ptype == 'c':
447 rep[i:i+1] = args[argnum]
448
449 elif ptype == 'e':
450 if argnum not in expanded:
451 expanded[argnum] = self.expand_macros(args[argnum])
452 rep[i:i+1] = expanded[argnum]
453
454
455 if comma_patch:
456 rep = [_i for _i in rep if _i]
457
458 return rep
459
460
461
462
463
464
465
466
467
468
470 if expanded is None:
471 expanded = {}
472 i = 0
473 while i < len(tokens):
474 t = tokens[i]
475 if t.type == self.t_ID:
476 if t.value in self.macros and t.value not in expanded:
477
478 expanded[t.value] = True
479
480 m = self.macros[t.value]
481 if not m.arglist:
482
483 ex = self.expand_macros([copy.copy(_x) for _x in m.value],expanded)
484 for e in ex:
485 e.lineno = t.lineno
486 tokens[i:i+1] = ex
487 i += len(ex)
488 else:
489
490 j = i + 1
491 while j < len(tokens) and tokens[j].type in self.t_WS:
492 j += 1
493 if tokens[j].value == '(':
494 tokcount,args,positions = self.collect_args(tokens[j:])
495 if not m.variadic and len(args) != len(m.arglist):
496 self.error(self.source,t.lineno,"Macro %s requires %d arguments" % (t.value,len(m.arglist)))
497 i = j + tokcount
498 elif m.variadic and len(args) < len(m.arglist)-1:
499 if len(m.arglist) > 2:
500 self.error(self.source,t.lineno,"Macro %s must have at least %d arguments" % (t.value, len(m.arglist)-1))
501 else:
502 self.error(self.source,t.lineno,"Macro %s must have at least %d argument" % (t.value, len(m.arglist)-1))
503 i = j + tokcount
504 else:
505 if m.variadic:
506 if len(args) == len(m.arglist)-1:
507 args.append([])
508 else:
509 args[len(m.arglist)-1] = tokens[j+positions[len(m.arglist)-1]:j+tokcount-1]
510 del args[len(m.arglist):]
511
512
513 rep = self.macro_expand_args(m,args)
514 rep = self.expand_macros(rep,expanded)
515 for r in rep:
516 r.lineno = t.lineno
517 tokens[i:j+tokcount] = rep
518 i += len(rep)
519 del expanded[t.value]
520 continue
521 elif t.value == '__LINE__':
522 t.type = self.t_INTEGER
523 t.value = self.t_INTEGER_TYPE(t.lineno)
524
525 i += 1
526 return tokens
527
528
529
530
531
532
533
534
536
537
538 i = 0
539 while i < len(tokens):
540 if tokens[i].type == self.t_ID and tokens[i].value == 'defined':
541 j = i + 1
542 needparen = False
543 result = "0L"
544 while j < len(tokens):
545 if tokens[j].type in self.t_WS:
546 j += 1
547 continue
548 elif tokens[j].type == self.t_ID:
549 if tokens[j].value in self.macros:
550 result = "1L"
551 else:
552 result = "0L"
553 if not needparen: break
554 elif tokens[j].value == '(':
555 needparen = True
556 elif tokens[j].value == ')':
557 break
558 else:
559 self.error(self.source,tokens[i].lineno,"Malformed defined()")
560 j += 1
561 tokens[i].type = self.t_INTEGER
562 tokens[i].value = self.t_INTEGER_TYPE(result)
563 del tokens[i+1:j+1]
564 i += 1
565 tokens = self.expand_macros(tokens)
566 for i,t in enumerate(tokens):
567 if t.type == self.t_ID:
568 tokens[i] = copy.copy(t)
569 tokens[i].type = self.t_INTEGER
570 tokens[i].value = self.t_INTEGER_TYPE("0L")
571 elif t.type == self.t_INTEGER:
572 tokens[i] = copy.copy(t)
573
574 tokens[i].value = str(tokens[i].value)
575 while tokens[i].value[-1] not in "0123456789abcdefABCDEF":
576 tokens[i].value = tokens[i].value[:-1]
577
578 expr = "".join([str(x.value) for x in tokens])
579 expr = expr.replace("&&"," and ")
580 expr = expr.replace("||"," or ")
581 expr = expr.replace("!"," not ")
582 try:
583 result = eval(expr)
584 except StandardError:
585 self.error(self.source,tokens[0].lineno,"Couldn't evaluate expression")
586 result = 0
587 return result
588
589
590
591
592
593
595
596
597 t = trigraph(input)
598 lines = self.group_lines(t)
599
600 if not source:
601 source = ""
602
603 self.define("__FILE__ \"%s\"" % source)
604
605 self.source = source
606 chunk = []
607 enable = True
608 iftrigger = False
609 ifstack = []
610
611 for x in lines:
612 for i,tok in enumerate(x):
613 if tok.type not in self.t_WS: break
614 if tok.value == '#':
615
616
617 for tok in x:
618 if tok in self.t_WS and '\n' in tok.value:
619 chunk.append(tok)
620
621 dirtokens = self.tokenstrip(x[i+1:])
622 if dirtokens:
623 name = dirtokens[0].value
624 args = self.tokenstrip(dirtokens[1:])
625 else:
626 name = ""
627 args = []
628
629 if name == 'define':
630 if enable:
631 for tok in self.expand_macros(chunk):
632 yield tok
633 chunk = []
634 self.define(args)
635 elif name == 'include':
636 if enable:
637 for tok in self.expand_macros(chunk):
638 yield tok
639 chunk = []
640 oldfile = self.macros['__FILE__']
641 for tok in self.include(args):
642 yield tok
643 self.macros['__FILE__'] = oldfile
644 self.source = source
645 elif name == 'undef':
646 if enable:
647 for tok in self.expand_macros(chunk):
648 yield tok
649 chunk = []
650 self.undef(args)
651 elif name == 'ifdef':
652 ifstack.append((enable,iftrigger))
653 if enable:
654 if not args[0].value in self.macros:
655 enable = False
656 iftrigger = False
657 else:
658 iftrigger = True
659 elif name == 'ifndef':
660 ifstack.append((enable,iftrigger))
661 if enable:
662 if args[0].value in self.macros:
663 enable = False
664 iftrigger = False
665 else:
666 iftrigger = True
667 elif name == 'if':
668 ifstack.append((enable,iftrigger))
669 if enable:
670 result = self.evalexpr(args)
671 if not result:
672 enable = False
673 iftrigger = False
674 else:
675 iftrigger = True
676 elif name == 'elif':
677 if ifstack:
678 if ifstack[-1][0]:
679 if enable:
680 enable = False
681 elif not iftrigger:
682 result = self.evalexpr(args)
683 if result:
684 enable = True
685 iftrigger = True
686 else:
687 self.error(self.source,dirtokens[0].lineno,"Misplaced #elif")
688
689 elif name == 'else':
690 if ifstack:
691 if ifstack[-1][0]:
692 if enable:
693 enable = False
694 elif not iftrigger:
695 enable = True
696 iftrigger = True
697 else:
698 self.error(self.source,dirtokens[0].lineno,"Misplaced #else")
699
700 elif name == 'endif':
701 if ifstack:
702 enable,iftrigger = ifstack.pop()
703 else:
704 self.error(self.source,dirtokens[0].lineno,"Misplaced #endif")
705 else:
706
707 pass
708
709 else:
710
711 if enable:
712 chunk.extend(x)
713
714 for tok in self.expand_macros(chunk):
715 yield tok
716 chunk = []
717
718
719
720
721
722
723
725
726 if not tokens:
727 return
728 if tokens:
729 if tokens[0].value != '<' and tokens[0].type != self.t_STRING:
730 tokens = self.expand_macros(tokens)
731
732 if tokens[0].value == '<':
733
734 i = 1
735 while i < len(tokens):
736 if tokens[i].value == '>':
737 break
738 i += 1
739 else:
740 print "Malformed #include <...>"
741 return
742 filename = "".join([x.value for x in tokens[1:i]])
743 path = self.path + [""] + self.temp_path
744 elif tokens[0].type == self.t_STRING:
745 filename = tokens[0].value[1:-1]
746 path = self.temp_path + [""] + self.path
747 else:
748 print "Malformed #include statement"
749 return
750 for p in path:
751 iname = os.path.join(p,filename)
752 try:
753 data = open(iname,"r").read()
754 dname = os.path.dirname(iname)
755 if dname:
756 self.temp_path.insert(0,dname)
757 for tok in self.parsegen(data,filename):
758 yield tok
759 if dname:
760 del self.temp_path[0]
761 break
762 except IOError,e:
763 pass
764 else:
765 print "Couldn't find '%s'" % filename
766
767
768
769
770
771
772
774 if isinstance(tokens,(str,unicode)):
775 tokens = self.tokenize(tokens)
776
777 linetok = tokens
778 try:
779 name = linetok[0]
780 if len(linetok) > 1:
781 mtype = linetok[1]
782 else:
783 mtype = None
784 if not mtype:
785 m = Macro(name.value,[])
786 self.macros[name.value] = m
787 elif mtype.type in self.t_WS:
788
789 m = Macro(name.value,self.tokenstrip(linetok[2:]))
790 self.macros[name.value] = m
791 elif mtype.value == '(':
792
793 tokcount, args, positions = self.collect_args(linetok[1:])
794 variadic = False
795 for a in args:
796 if variadic:
797 print "No more arguments may follow a variadic argument"
798 break
799 astr = "".join([str(_i.value) for _i in a])
800 if astr == "...":
801 variadic = True
802 a[0].type = self.t_ID
803 a[0].value = '__VA_ARGS__'
804 variadic = True
805 del a[1:]
806 continue
807 elif astr[-3:] == "..." and a[0].type == self.t_ID:
808 variadic = True
809 del a[1:]
810
811
812 if a[0].value[-3:] == '...':
813 a[0].value = a[0].value[:-3]
814 continue
815 if len(a) > 1 or a[0].type != self.t_ID:
816 print "Invalid macro argument"
817 break
818 else:
819 mvalue = self.tokenstrip(linetok[1+tokcount:])
820 i = 0
821 while i < len(mvalue):
822 if i+1 < len(mvalue):
823 if mvalue[i].type in self.t_WS and mvalue[i+1].value == '##':
824 del mvalue[i]
825 continue
826 elif mvalue[i].value == '##' and mvalue[i+1].type in self.t_WS:
827 del mvalue[i+1]
828 i += 1
829 m = Macro(name.value,mvalue,[x[0].value for x in args],variadic)
830 self.macro_prescan(m)
831 self.macros[name.value] = m
832 else:
833 print "Bad macro definition"
834 except LookupError:
835 print "Bad macro definition"
836
837
838
839
840
841
842
844 id = tokens[0].value
845 try:
846 del self.macros[id]
847 except LookupError:
848 pass
849
850
851
852
853
854
855 - def parse(self,input,source=None,ignore={}):
856 self.ignore = ignore
857 self.parser = self.parsegen(input,source)
858
859
860
861
862
863
865 try:
866 while True:
867 tok = self.parser.next()
868 if tok.type not in self.ignore: return tok
869 except StopIteration:
870 self.parser = None
871 return None
872
873 if __name__ == '__main__':
874 import ply.lex as lex
875 lexer = lex.lex()
876
877
878 import sys
879 f = open(sys.argv[1])
880 input = f.read()
881
882 p = Preprocessor(lexer)
883 p.parse(input,sys.argv[1])
884 while True:
885 tok = p.token()
886 if not tok: break
887 print p.source, tok
888