1 """
2 A parser for the rule configuration format.
3
4 see doc/rule_grammar.rst for documentation.
5
6 """
7 import datetime
8 import logging
9 import os
10 import re
11 import sys
12
13
14 PARSEDEBUG=os.environ.get('PARSEDEBUG', False)
15
16 from pkg_resources import resource_filename
17 from ewa.ply import lex, yacc
18
19 from ewa.logutil import logger, critical, error, debug
20 import ewa.rules as rules
21
22
25
27
29 raise ParseError('lexical error, line %d: %s' \
30 % (t.lexer.lineno, t.value))
31
32
34 r'\n+'
35 t.lexer.lineno += len(t.value)
36
38 r'regex:(?:\'(?:[^\']|\\\')*\'|"(?:[^"]|\\")*")'
39 q=t.value[7]
40 t.value=t.value[7:-1].replace('\\'+q,q)
41 return t
42
44 r'regex:[^ ,:]+'
45 t.value=t.value[6:].replace(
46 '\ ', ' ').replace(
47 '\,', ',').replace(
48 '\:', ':')
49 return t
50
52 r'(?P<month>0[1-9]|1[012])(?P<sep>-|/|\.)(?P<day>0[1-9]|[12]\d|3[01])(?P=sep)(?P<year>\d{4})'
53 d=t.lexer.lexmatch.groupdict()
54 kw=dict((k,int(d[k])) for k in ('month', 'day', 'year'))
55 try:
56 t.value=datetime.date(**kw)
57 except ValueError:
58 critical("invalid date: %s", t.value)
59 raise
60 return t
61
63 r'(?P<month>0[1-9]|1[012])(?P<sep>-|/|\.)(?P<day>0[1-9]|[12]\d|3[01])(?P=sep)(?P<year>\d{4})(?: |(?P=sep))(?P<hour>[01][0-9]|2[0-3])(?P<minute>[0-5]{2})'
64 d=t.lexer.lexmatch.groupdict()
65 kw=dict((k,int(d[k])) for k in ('month', 'day', 'year', 'hour', 'minute'))
66 for k in ('hour', 'minute'):
67 if kw[k]==None:
68 kw[k]=0
69
70 kw['microsecond']=0
71 try:
72 t.value=datetime.datetime(**kw)
73 except ValueError:
74 critical("invalid datetime: %s", t.value)
75 raise
76 return t
77
79 r'[\w\d/\*\\\?_\.]+'
80 if t.value in self.reserved:
81 t.type=t.value.upper()
82 return t
83 return t
84
86 r'\'(?:[^\']|\\\')*\'|"(?:[^"]|\\")*"'
87 q=t.value[0]
88 t.value=t.value[1:-1].replace('\\'+q, q)
89 return t
90
91
92 reserved=('default',
93 'pre',
94 'post',
95 'and',
96 'or',
97 'not')
98
102
103 tokens=('OP',
104 'BAREWORD',
105 'QWORD',
106 'BAREREGEX',
107 'QREGEX',
108 'DATE',
109 'DATETIME',
110 'DEFAULT',
111 'PRE',
112 'POST',
113 'AND',
114 'OR',
115 'NOT',
116 'LPAREN',
117 'RPAREN',
118 'LBRACK',
119 'RBRACK',
120 'COLON',
121 'COMMA',
122 'DASH')
123
124 t_LPAREN=r'\('
125
126 t_RPAREN=r'\)'
127
128 t_LBRACK=r'\['
129
130 t_RBRACK=r'\]'
131
132 t_COLON=':'
133
134 t_COMMA=','
135
136 t_DASH='-'
137
138 t_OP='<=|>=|<|>|='
139
140 t_ignore=' \t'
141
143 if not (p is None):
144 raise ParseError("Syntax error at line %d: %s" % (p.lineno, p))
145 else:
146 raise ParseError("Syntax error: unexpected EOF")
147
151
153 '''cond_rule_list : cond_rule
154 | cond_rule COMMA cond_rule_list
155 | cond_rule cond_rule_list'''
156 plen=len(p)
157 if plen==2:
158 p[0]=[p[1]]
159 elif plen==3:
160 p[0]=[p[1]]+p[2]
161 elif plen==4:
162 p[0]=[p[1]]+p[3]
163
165 'rulelist : LBRACK cond_rule_list RBRACK'
166 p[0]=p[2]
167
169 'rulelist : LBRACK RBRACK'
170 p[0]=[]
171
173 '''cond_rule : cond COLON rule
174 | rule'''
175 if len(p)==2:
176 p[0]=self._gen_rule(None, p[1])
177 else:
178 p[0]=self._gen_rule(p[1], p[3])
179
181 if isinstance(data, dict) and 'pre' in data:
182 return rules.MatchRule(cond, data['pre'], data['post'])
183 elif isinstance(data, list):
184
185 return rules.RuleList(data, cond=cond)
186 else:
187 assert 0, 'not reached'
188
190 '''rule : simplerule
191 | rulelist'''
192 p[0]=p[1]
193
195 'simplerule : prelist COMMA postlist'
196 p[0]=dict(pre=p[1], post=p[3])
197
199 'simplerule : prelist postlist'
200 p[0]=dict(pre=p[1], post=p[2])
201
203 'simplerule : postlist COMMA prelist'
204 p[0]=dict(pre=p[3], post=p[1])
205
207 'simplerule : postlist prelist'
208 p[0]=dict(pre=p[2], post=p[1])
209
211 'simplerule : DEFAULT'
212 p[0]=dict(pre=[], post=[])
213
215 'prelist : PRE COLON speclist'
216 p[0]=p[3]
217
218 - def p_postlist(self, p):
219 'postlist : POST COLON speclist'
220 p[0]=p[3]
221
223 'speclist : LBRACK specifier_list RBRACK'
224 p[0]=p[2]
225
227 'speclist : LBRACK RBRACK'
228 p[0]=[]
229
231 '''specifier_list : specifier
232 | specifier COMMA specifier_list'''
233 if len(p)==2:
234 p[0]=[p[1]]
235 else:
236 p[0]=[p[1]]+p[3]
237
239 'specifier : string'
240 p[0]=p[1]
241
243 '''string : BAREWORD
244 | QWORD'''
245 p[0]=p[1]
246
248 '''cond : cond_expr
249 | simple_cond'''
250 p[0]=p[1]
251
253 'cond_expr : cond_op LPAREN cond_list RPAREN'
254 p[0]=p[1](*p[3])
255
257 'cond_expr : NOT LPAREN cond RPAREN'
258 p[0]=rules.Not(p[3])
259
261 '''cond_list : cond
262 | cond COMMA cond_list'''
263 if len(p)==2:
264 p[0]=[p[1]]
265 else:
266 p[0]=[p[1]]+p[3]
267
269 '''cond_op : AND
270 | OR'''
271
272 t=p[1]
273 if t=='and':
274 p[0]=rules.And
275 elif t=='or':
276 p[0]=rules.Or
277
278
280 '''simple_cond : regex
281 | glob
282 | datespec'''
283 p[0]=p[1]
284
289
291 '''regex : BAREREGEX condopts
292 | QREGEX condopts'''
293
294 flags, nothing=p[2]
295 if nothing:
296 raise ParseError("illegal options for regex: %s" % p)
297 validopts=dict(I=re.I, U=re.U, L=re.L)
298 diff=set(flags).difference(validopts)
299 if diff:
300 raise ParseError("illegal options for regex: %s" % ','.join(list(diff)))
301 flags=reduce(lambda x, y: x | y,
302 [validopts[k] for k in flags])
303 p[0]=rules.RegexMatcher(p[1], flags)
304
308
313
315 d=dict(YYYY=('%Y', '\d{4}'),
316 YY=('%y', '\d\d'),
317 MM=('%m', '\d\d'),
318 DD=('%d', '\d\d'),
319 HH=('%H', '\d\d'),
320 mm=('%M', '\d\d'),
321 PM=('%p', '(?:AM|PM)'),
322 hh=('%I', '\d\d'))
323 buff=fmt
324 regex=[]
325 newfmt=[]
326 keys=sorted(d, reverse=True)
327 while buff:
328 for k in keys:
329 if buff.startswith(k):
330 buff=buff[len(k):]
331 v=d[k]
332 newfmt.append(v[0])
333 regex.append(v[1])
334 break
335 else:
336 newfmt.append(buff[0])
337 regex.append(buff[0])
338 buff=buff[1:]
339
340 if not buff:
341 break
342
343 return (''.join(regex),
344 ''.join(newfmt))
345
347 posopts, keyedopts=opts
348 if posopts:
349 supported_pos=['F', 'T']
350 spos=set(posopts)
351 diff=spos.difference(supported_pos)
352 if diff:
353 raise ParseError(
354 'unsupported condition options on line %d: %s' \
355 %s (lineno, ', '.join(list(diff))))
356 lpos=len(posopts)
357 if lpos>len(spos):
358 raise ParseError(
359 'duplicate option on line %s: %s' \
360 % (lineno, ', '.join(posopts)))
361 if lpos>=2:
362 raise ParseError(
363 'incompatible options on line %d: %s' \
364 % (lineno, ', '.join(posopts)))
365 if posopts and posopts[0]=='F':
366 supported_keys=['fmt']
367 diff=set(keyedopts).difference(supported_keys)
368 if diff:
369 raise ParseError(
370 'unsupported options on line %d: %s' \
371 % (lineno, ', '.join(list(diff))))
372 if keyedopts:
373 fmt=keyedopts['fmt']
374 regex, newfmt=self._expand_datefmt(fmt)
375 return rules.FileTimeMatch(start, end, regex, newfmt)
376
377 else:
378 return rules.CurrentTimeMatch(start, end)
379
388 '''datespec : datetime DASH datetime condopts
389 | date DASH date condopts
390 | datetime DASH date condopts
391 | date DASH datetime condopts'''
392 p[0]=self._gen_datematcher(_to_datetime(p[1]),
393 _to_datetime(p[3]),
394 p[4],
395 p.lineno)
396
402
408
410 '''condopts : LBRACK condopt_list RBRACK'''
411 pos=[]
412 keyed={}
413 for opt in p[2]:
414 if isinstance(opt, tuple):
415 k,v=opt
416 if k in keyed:
417 raise ParseError(
418 "duplicate option on line %d: %s" % (p.lineno, k))
419 keyed[k]=v
420 else:
421 pos.append(opt)
422 p[0]=(pos, keyed)
423
425 '''condopt_list : condopt
426 | condopt COMMA condopt_list'''
427 if len(p)==2:
428 p[0]=[p[1]]
429 else:
430 p[0]=[p[1]]+p[3]
431
433 '''condopt : BAREWORD OP BAREWORD'''
434 op=p[2]
435 if op!='=':
436 raise ParseError(
437 "expected '=' in condition options on line %d, got %s" \
438 % (p.lineno, op))
439 p[0]=(p[1], p[3])
440
441
443 '''condopt : BAREWORD'''
444 p[0]=p[1]
445
447 'datecompare : OP'
448 p[0]=p[1]
449
451 'date : DATE'
452 p[0]=p[1]
453
455 'datetime : DATETIME'
456 p[0]=p[1]
457
459
460 if op == '=':
461 return (_to_datetime(date),
462 datetime.datetime(date.year,
463 date.month,
464 date.day,
465 23,
466 59,
467 59))
468
469 elif op == '<=':
470 return (datetime.datetime.min,
471 _to_datetime(date))
472
473 elif op == '>=':
474 return (_to_datetime(date),
475 datetime.datetime.max)
476 elif op == '<':
477 return (datetime.datetime.min,
478 _to_datetime(date)-datetime.timedelta(microseconds=1))
479 elif op == '>':
480 return (_to_datetime(date)+datetime.timedelta(microseconds=1),
481 datetime.datetime.max)
482 else:
483 assert 0, "not reached"
484
486 if isinstance(dt, datetime.date):
487 return datetime.datetime(dt.year,
488 dt.month,
489 dt.day,
490 0,
491 0,
492 0)
493 return dt
494
496 if debug:
497 logger.setLevel(logging.DEBUG)
498 logging.basicConfig(stream=sys.stderr)
499 p=RuleParser()
500 lexer=lex.lex(module=p)
501 if PARSEDEBUG:
502 yaccopts=dict(optimize=False,
503 debug=True,
504 tabmodule='_ruletab',
505
506 outputdir=resource_filename('ewa.__init__', ''))
507 else:
508 yaccopts=dict(write_tables=False,
509 debug=False,
510 tabmodule='ewa._ruletab')
511 parser=yacc.yacc(module=p,
512 **yaccopts)
513 return parser.parse(s, lexer=lexer, debug=debug)
514
519
521 if not hasattr(fp, 'read'):
522 fp=open(fp)
523 return lex_string(fp.read(), debug)
524
526 if debug:
527 logger.setLevel(logging.DEBUG)
528 logging.basicConfig(stream=sys.stderr)
529 p=RuleParser()
530 lexer=lex.lex(module=p)
531 lexer.input(s)
532 while 1:
533 tok=lexer.token()
534 if not tok:
535 break
536 print tok
537
538
539 if __name__=='__main__':
540 args=sys.argv[1:]
541 if len(args)==2:
542 if args[0]=='-lex':
543 lex_file(args[1], True)
544 sys.exit(0)
545 elif args[0]=='-parse':
546 thing=parse_file(args[1], True)
547 print rules.to_json(thing)
548 sys.exit(0)
549
550 prog=os.path.basename(sys.argv[0])
551 print >> sys.stderr, 'usage: %s [-lex | -parse] file' % prog
552 sys.exit(1)
553