Module XPathProcessor
[hide private]
[frames] | no frames]

Source Code for Module XPathProcessor

  1   
  2  from configParser import C3Object 
  3  from utils import elementType, getFirstData, verifyXPaths 
  4   
5 -class XPathProcessor(C3Object):
6 sources = [] 7
8 - def _handleConfigNode(self, session, node):
9 if (node.localName == "source"): 10 xpaths = [] 11 for child in node.childNodes: 12 if child.nodeType == elementType: 13 if child.localName == "xpath": 14 # add XPath 15 data = {'schema' : '', 'xpath': None, 'maps': {}} 16 17 xp = getFirstData(child) 18 data['xpath'] = verifyXPaths([xp])[0] 19 20 for a in child.attributes.keys(): 21 # ConfigStore using 4Suite 22 if type(a) == tuple: 23 attrNode = child.attributes[a] 24 a = attrNode.name 25 if (a[:6] == "xmlns:"): 26 pref = a[6:] 27 uri = child.getAttributeNS('http://www.w3.org/2000/xmlns/', pref) 28 if not uri: 29 uri = child.getAttribute(a) 30 data['maps'][pref] = uri 31 elif a == 'schema': 32 data['schema'] = child.getAttributeNS(None, 'schema') 33 xpaths.append(data) 34 self.sources.append(xpaths)
35
36 - def __init__(self, session, config, parent):
37 self.sources = [] 38 self.schema = "" 39 C3Object.__init__(self, session, config, parent)
40
41 - def process_record(self, session, record):
42 # Extract XPath and return values 43 vals = [] 44 for src in self.sources: 45 # list of {}s 46 for xp in src: 47 if xp['schema'] and record.schema != xp['schema']: 48 continue 49 vals.append(record.process_xpath(xp['xpath'], xp['maps'])) 50 return vals
51 52 53 # two xpaths, span between them
54 -class SpanExtractor(XPathProcessor):
55
56 - def process_record(self, session, record):
57 endTag = src[1][-1][0][1] 58 for r in raw: 59 start = int(r[-1][r[-1].rfind(' ')+1:]) 60 comp = [rec.sax[start]] 61 startTag = rec._convert_elem(comp[0])[0] 62 usingNs = comp[0][0] 63 n = 0 64 okay = 1 65 saxlen = len(rec.sax) -1 66 openTags = [] 67 while okay and start + n < saxlen: 68 n += 1 69 line = rec.sax[start+n] 70 if(line[0] in ['1', '4']): 71 # Check it 72 if (rec._checkSaxXPathLine(src[1][-1], start + n)): 73 # Matched end 74 okay = 0 75 else: 76 # Add tags to close 77 if line[0] == '4': 78 end = line.rfind("}") 79 stuff = eval(line[2:end+1]) 80 ns, tag = stuff[0], stuff[1] 81 openTags.append((ns, tag)) 82 else: 83 openTags.append(rec._convert_elem(line)[0]) 84 comp.append(line) 85 elif (line[0] in ['2', '5']): 86 # check we're open 87 if (line[0] == '2'): 88 end = line.rfind(' ') 89 tag = line[2:end] 90 else: 91 tag = eval(line[2:line.rfind(',')])[0:2] 92 if ((n == 1 and tag[1] == startTag) or (openTags and openTags[-1] == tag)): 93 comp.append(line) 94 if openTags: 95 openTags.pop(-1) 96 elif (line[0] == '3'): 97 comp.append(line) 98 if (openTags): 99 openTags.reverse() 100 for o in openTags: 101 if usingNs == '1': 102 comp.append("2 %s" % o) 103 else: 104 comp.append("5 u'%s', u'%s', u'', None" % o)
105