Package ewa :: Module ruleparser
[hide private]
[frames] | no frames]

Source Code for Module ewa.ruleparser

  1  """ 
  2  A parser for the rule configuration format. 
  3   
  4  see doc/rule_grammar.rst for documentation. 
  5   
  6  """ 
  7  import datetime 
  8  import logging 
  9  import os 
 10  import re 
 11  import sys 
 12   
 13  # developer feature 
 14  PARSEDEBUG=os.environ.get('PARSEDEBUG', False) 
 15   
 16  from pkg_resources import resource_filename     
 17  from ewa.ply import lex, yacc 
 18   
 19  from ewa.logutil import logger, critical, error, debug 
 20  import ewa.rules as rules 
 21   
 22   
23 -class ParseError(RuntimeError):
24 pass
25
26 -class RuleParser(object):
27
28 - def t_error(self, t):
29 raise ParseError('lexical error, line %d: %s' \ 30 % (t.lexer.lineno, t.value))
31 32 # Define a rule so we can track line numbers
33 - def t_newline(self, t):
34 r'\n+' 35 t.lexer.lineno += len(t.value)
36
37 - def t_QREGEX(self, t):
38 r'regex:(?:\'(?:[^\']|\\\')*\'|"(?:[^"]|\\")*")' 39 q=t.value[7] 40 t.value=t.value[7:-1].replace('\\'+q,q) 41 return t
42
43 - def t_BAREREGEX(self, t):
44 r'regex:[^ ,:]+' 45 t.value=t.value[6:].replace( 46 '\ ', ' ').replace( 47 '\,', ',').replace( 48 '\:', ':') 49 return t
50
51 - def t_DATE(self, t):
52 r'(?P<month>0[1-9]|1[012])(?P<sep>-|/|\.)(?P<day>0[1-9]|[12]\d|3[01])(?P=sep)(?P<year>\d{4})' 53 d=t.lexer.lexmatch.groupdict() 54 kw=dict((k,int(d[k])) for k in ('month', 'day', 'year')) 55 try: 56 t.value=datetime.date(**kw) 57 except ValueError: 58 critical("invalid date: %s", t.value) 59 raise 60 return t
61
62 - def t_DATETIME(self, t):
63 r'(?P<month>0[1-9]|1[012])(?P<sep>-|/|\.)(?P<day>0[1-9]|[12]\d|3[01])(?P=sep)(?P<year>\d{4})(?: |(?P=sep))(?P<hour>[01][0-9]|2[0-3])(?P<minute>[0-5]{2})' 64 d=t.lexer.lexmatch.groupdict() 65 kw=dict((k,int(d[k])) for k in ('month', 'day', 'year', 'hour', 'minute')) 66 for k in ('hour', 'minute'): 67 if kw[k]==None: 68 kw[k]=0 69 # normalize to 0 70 kw['microsecond']=0 71 try: 72 t.value=datetime.datetime(**kw) 73 except ValueError: 74 critical("invalid datetime: %s", t.value) 75 raise 76 return t
77
78 - def t_BAREWORD(self, t):
79 r'[\w\d/\*\\\?_\.]+' 80 if t.value in self.reserved: 81 t.type=t.value.upper() 82 return t 83 return t
84
85 - def t_QWORD(self, t):
86 r'\'(?:[^\']|\\\')*\'|"(?:[^"]|\\")*"' 87 q=t.value[0] 88 t.value=t.value[1:-1].replace('\\'+q, q) 89 return t
90 91 92 reserved=('default', 93 'pre', 94 'post', 95 'and', 96 'or', 97 'not') 98
99 - def t_COMMENT(self, t):
100 '\#.*' 101 pass
102 103 tokens=('OP', 104 'BAREWORD', 105 'QWORD', 106 'BAREREGEX', 107 'QREGEX', 108 'DATE', 109 'DATETIME', 110 'DEFAULT', 111 'PRE', 112 'POST', 113 'AND', 114 'OR', 115 'NOT', 116 'LPAREN', 117 'RPAREN', 118 'LBRACK', 119 'RBRACK', 120 'COLON', 121 'COMMA', 122 'DASH') 123 124 t_LPAREN=r'\(' 125 126 t_RPAREN=r'\)' 127 128 t_LBRACK=r'\[' 129 130 t_RBRACK=r'\]' 131 132 t_COLON=':' 133 134 t_COMMA=',' 135 136 t_DASH='-' 137 138 t_OP='<=|>=|<|>|=' 139 140 t_ignore=' \t' 141
142 - def p_error(self, p):
143 if not (p is None): 144 raise ParseError("Syntax error at line %d: %s" % (p.lineno, p)) 145 else: 146 raise ParseError("Syntax error: unexpected EOF")
147
148 - def p_grammar(self, p):
149 'grammar : cond_rule_list' 150 p[0]=rules.RuleList(p[1])
151
152 - def p_cond_rule_list(self, p):
153 '''cond_rule_list : cond_rule 154 | cond_rule COMMA cond_rule_list 155 | cond_rule cond_rule_list''' 156 plen=len(p) 157 if plen==2: 158 p[0]=[p[1]] 159 elif plen==3: 160 p[0]=[p[1]]+p[2] 161 elif plen==4: 162 p[0]=[p[1]]+p[3]
163
164 - def p_rulelist_1(self, p):
165 'rulelist : LBRACK cond_rule_list RBRACK' 166 p[0]=p[2]
167
168 - def p_rulelist_2(self, p):
169 'rulelist : LBRACK RBRACK' 170 p[0]=[]
171
172 - def p_cond_rule(self, p):
173 '''cond_rule : cond COLON rule 174 | rule''' 175 if len(p)==2: 176 p[0]=self._gen_rule(None, p[1]) 177 else: 178 p[0]=self._gen_rule(p[1], p[3])
179
180 - def _gen_rule(self, cond, data):
181 if isinstance(data, dict) and 'pre' in data: 182 return rules.MatchRule(cond, data['pre'], data['post']) 183 elif isinstance(data, list): 184 # a rulelist 185 return rules.RuleList(data, cond=cond) 186 else: 187 assert 0, 'not reached'
188
189 - def p_rule(self, p):
190 '''rule : simplerule 191 | rulelist''' 192 p[0]=p[1]
193
194 - def p_simplerule_1(self, p):
195 'simplerule : prelist COMMA postlist' 196 p[0]=dict(pre=p[1], post=p[3])
197
198 - def p_simplerule_2(self, p):
199 'simplerule : prelist postlist' 200 p[0]=dict(pre=p[1], post=p[2])
201
202 - def p_simplerule_3(self, p):
203 'simplerule : postlist COMMA prelist' 204 p[0]=dict(pre=p[3], post=p[1])
205
206 - def p_simplerule_4(self, p):
207 'simplerule : postlist prelist' 208 p[0]=dict(pre=p[2], post=p[1])
209
210 - def p_simplerule_5(self, p):
211 'simplerule : DEFAULT' 212 p[0]=dict(pre=[], post=[])
213
214 - def p_prelist(self, p):
215 'prelist : PRE COLON speclist' 216 p[0]=p[3]
217
218 - def p_postlist(self, p):
219 'postlist : POST COLON speclist' 220 p[0]=p[3]
221
222 - def p_speclist_1(self, p):
223 'speclist : LBRACK specifier_list RBRACK' 224 p[0]=p[2]
225
226 - def p_speclist_2(self, p):
227 'speclist : LBRACK RBRACK' 228 p[0]=[]
229
230 - def p_specifier_list(self, p):
231 '''specifier_list : specifier 232 | specifier COMMA specifier_list''' 233 if len(p)==2: 234 p[0]=[p[1]] 235 else: 236 p[0]=[p[1]]+p[3]
237
238 - def p_specifier(self, p):
239 'specifier : string' 240 p[0]=p[1]
241
242 - def p_string(self, p):
243 '''string : BAREWORD 244 | QWORD''' 245 p[0]=p[1]
246
247 - def p_cond(self, p):
248 '''cond : cond_expr 249 | simple_cond''' 250 p[0]=p[1]
251
252 - def p_cond_expr_1(self, p):
253 'cond_expr : cond_op LPAREN cond_list RPAREN' 254 p[0]=p[1](*p[3])
255
256 - def p_cond_expr_2(self, p):
257 'cond_expr : NOT LPAREN cond RPAREN' 258 p[0]=rules.Not(p[3])
259
260 - def p_cond_list(self, p):
261 '''cond_list : cond 262 | cond COMMA cond_list''' 263 if len(p)==2: 264 p[0]=[p[1]] 265 else: 266 p[0]=[p[1]]+p[3]
267
268 - def p_cond_op(self, p):
269 '''cond_op : AND 270 | OR''' 271 272 t=p[1] 273 if t=='and': 274 p[0]=rules.And 275 elif t=='or': 276 p[0]=rules.Or
277 278
279 - def p_simple_cond(self, p):
280 '''simple_cond : regex 281 | glob 282 | datespec''' 283 p[0]=p[1]
284
285 - def p_regex_1(self, p):
286 '''regex : BAREREGEX 287 | QREGEX''' 288 p[0]=rules.RegexMatcher(p[1])
289
290 - def p_regex_2(self, p):
291 '''regex : BAREREGEX condopts 292 | QREGEX condopts''' 293 # supported flags: I, L, U 294 flags, nothing=p[2] 295 if nothing: 296 raise ParseError("illegal options for regex: %s" % p) 297 validopts=dict(I=re.I, U=re.U, L=re.L) 298 diff=set(flags).difference(validopts) 299 if diff: 300 raise ParseError("illegal options for regex: %s" % ','.join(list(diff))) 301 flags=reduce(lambda x, y: x | y, 302 [validopts[k] for k in flags]) 303 p[0]=rules.RegexMatcher(p[1], flags)
304
305 - def p_glob_1(self, p):
306 'glob : string' 307 p[0]=rules.GlobMatcher(p[1])
308
309 - def p_glob_2(self, p):
310 'glob : string condopts' 311 # TO BE DONE 312 p[0]=rules.GlobMatcher(p[1])
313
314 - def _expand_datefmt(self, fmt):
315 d=dict(YYYY=('%Y', '\d{4}'), 316 YY=('%y', '\d\d'), 317 MM=('%m', '\d\d'), 318 DD=('%d', '\d\d'), 319 HH=('%H', '\d\d'), 320 mm=('%M', '\d\d'), 321 PM=('%p', '(?:AM|PM)'), 322 hh=('%I', '\d\d')) 323 buff=fmt 324 regex=[] 325 newfmt=[] 326 keys=sorted(d, reverse=True) 327 while buff: 328 for k in keys: 329 if buff.startswith(k): 330 buff=buff[len(k):] 331 v=d[k] 332 newfmt.append(v[0]) 333 regex.append(v[1]) 334 break 335 else: 336 newfmt.append(buff[0]) 337 regex.append(buff[0]) 338 buff=buff[1:] 339 340 if not buff: 341 break 342 343 return (''.join(regex), 344 ''.join(newfmt))
345
346 - def _gen_datematcher(self, start, end, opts, lineno):
347 posopts, keyedopts=opts 348 if posopts: 349 supported_pos=['F', 'T'] 350 spos=set(posopts) 351 diff=spos.difference(supported_pos) 352 if diff: 353 raise ParseError( 354 'unsupported condition options on line %d: %s' \ 355 %s (lineno, ', '.join(list(diff)))) 356 lpos=len(posopts) 357 if lpos>len(spos): 358 raise ParseError( 359 'duplicate option on line %s: %s' \ 360 % (lineno, ', '.join(posopts))) 361 if lpos>=2: 362 raise ParseError( 363 'incompatible options on line %d: %s' \ 364 % (lineno, ', '.join(posopts))) 365 if posopts and posopts[0]=='F': 366 supported_keys=['fmt'] 367 diff=set(keyedopts).difference(supported_keys) 368 if diff: 369 raise ParseError( 370 'unsupported options on line %d: %s' \ 371 % (lineno, ', '.join(list(diff)))) 372 if keyedopts: 373 fmt=keyedopts['fmt'] 374 regex, newfmt=self._expand_datefmt(fmt) 375 return rules.FileTimeMatch(start, end, regex, newfmt) 376 377 else: 378 return rules.CurrentTimeMatch(start, end)
379
380 - def p_datespec_1(self, p):
381 '''datespec : datetime DASH datetime 382 | date DASH date 383 | datetime DASH date 384 | date DASH datetime''' 385 p[0]=rules.CurrentTimeMatch(_to_datetime(p[1]), 386 _to_datetime(p[3]))
387 - def p_datespec_2(self, p):
388 '''datespec : datetime DASH datetime condopts 389 | date DASH date condopts 390 | datetime DASH date condopts 391 | date DASH datetime condopts''' 392 p[0]=self._gen_datematcher(_to_datetime(p[1]), 393 _to_datetime(p[3]), 394 p[4], 395 p.lineno)
396
397 - def p_datespec_3(self, p):
398 '''datespec : datecompare datetime 399 | datecompare date''' 400 start, end=self._resolve_datecompare(p[1], p[2]) 401 p[0]=rules.CurrentTimeMatch(start, end)
402
403 - def p_datespec_4(self, p):
404 '''datespec : datecompare datetime condopts 405 | datecompare date condopts''' 406 start, end=self._resolve_datecompare(p[1], p[2]) 407 p[0]=self._gen_datematcher(start, end, p[3], p.lineno)
408
409 - def p_condopts(self, p):
410 '''condopts : LBRACK condopt_list RBRACK''' 411 pos=[] 412 keyed={} 413 for opt in p[2]: 414 if isinstance(opt, tuple): 415 k,v=opt 416 if k in keyed: 417 raise ParseError( 418 "duplicate option on line %d: %s" % (p.lineno, k)) 419 keyed[k]=v 420 else: 421 pos.append(opt) 422 p[0]=(pos, keyed)
423
424 - def p_condopt_list(self, p):
425 '''condopt_list : condopt 426 | condopt COMMA condopt_list''' 427 if len(p)==2: 428 p[0]=[p[1]] 429 else: 430 p[0]=[p[1]]+p[3]
431
432 - def p_condopt_1(self, p):
433 '''condopt : BAREWORD OP BAREWORD''' 434 op=p[2] 435 if op!='=': 436 raise ParseError( 437 "expected '=' in condition options on line %d, got %s" \ 438 % (p.lineno, op)) 439 p[0]=(p[1], p[3])
440 441
442 - def p_condopt_2(self, p):
443 '''condopt : BAREWORD''' 444 p[0]=p[1]
445
446 - def p_datecompare(self, p):
447 'datecompare : OP' 448 p[0]=p[1]
449
450 - def p_date(self, p):
451 'date : DATE' 452 p[0]=p[1]
453
454 - def p_datetime(self, p):
455 'datetime : DATETIME' 456 p[0]=p[1]
457
458 - def _resolve_datecompare(self, op, date):
459 460 if op == '=': 461 return (_to_datetime(date), 462 datetime.datetime(date.year, 463 date.month, 464 date.day, 465 23, 466 59, 467 59)) 468 469 elif op == '<=': 470 return (datetime.datetime.min, 471 _to_datetime(date)) 472 473 elif op == '>=': 474 return (_to_datetime(date), 475 datetime.datetime.max) 476 elif op == '<': 477 return (datetime.datetime.min, 478 _to_datetime(date)-datetime.timedelta(microseconds=1)) 479 elif op == '>': 480 return (_to_datetime(date)+datetime.timedelta(microseconds=1), 481 datetime.datetime.max) 482 else: 483 assert 0, "not reached"
484
485 -def _to_datetime(dt):
486 if isinstance(dt, datetime.date): 487 return datetime.datetime(dt.year, 488 dt.month, 489 dt.day, 490 0, 491 0, 492 0) 493 return dt
494
495 -def parse_string(s, debug=False):
496 if debug: 497 logger.setLevel(logging.DEBUG) 498 logging.basicConfig(stream=sys.stderr) 499 p=RuleParser() 500 lexer=lex.lex(module=p) 501 if PARSEDEBUG: 502 yaccopts=dict(optimize=False, 503 debug=True, 504 tabmodule='_ruletab', 505 # directory of this module 506 outputdir=resource_filename('ewa.__init__', '')) 507 else: 508 yaccopts=dict(write_tables=False, 509 debug=False, 510 tabmodule='ewa._ruletab') 511 parser=yacc.yacc(module=p, 512 **yaccopts) 513 return parser.parse(s, lexer=lexer, debug=debug)
514
515 -def parse_file(fp, debug=False):
516 if not hasattr(fp, 'read'): 517 fp=open(fp) 518 return parse_string(fp.read(), debug=debug)
519
520 -def lex_file(fp, debug=False):
521 if not hasattr(fp, 'read'): 522 fp=open(fp) 523 return lex_string(fp.read(), debug)
524
525 -def lex_string(s, debug=False):
526 if debug: 527 logger.setLevel(logging.DEBUG) 528 logging.basicConfig(stream=sys.stderr) 529 p=RuleParser() 530 lexer=lex.lex(module=p) 531 lexer.input(s) 532 while 1: 533 tok=lexer.token() 534 if not tok: 535 break 536 print tok
537 538 539 if __name__=='__main__': 540 args=sys.argv[1:] 541 if len(args)==2: 542 if args[0]=='-lex': 543 lex_file(args[1], True) 544 sys.exit(0) 545 elif args[0]=='-parse': 546 thing=parse_file(args[1], True) 547 print rules.to_json(thing) 548 sys.exit(0) 549 550 prog=os.path.basename(sys.argv[0]) 551 print >> sys.stderr, 'usage: %s [-lex | -parse] file' % prog 552 sys.exit(1) 553