1
2 from baseObjects import Record
3 from c3errors import C3Exception
4 import types, utils, os, re
5 from Ft.Xml.Domlette import implementation, Print
6 from cStringIO import StringIO
7 from xml.sax.saxutils import escape
8 from PyZ3950.zmarc_relaxed import MARC
9 from xml.sax import ContentHandler
10
11 from utils import Context, flattenTexts
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28 -class SaxContentHandler(ContentHandler):
29 currentText = []
30 currentPath = []
31 pathLines = []
32 currentLine = -1
33 recordSize = 0
34 elementHash = {}
35 namespaces = []
36 hashAttributesNames = {}
37 hashAttributes = []
38 stripWS = 0
39 saveElementIndexes = 1
40
42 self.saveElementIndexes = 1
43 self.hashAttributesNames = {}
44 self.hashAttributes = []
45 self.stripWS = 0
46 self.reinit()
47
49 self.currentText = []
50 self.currentPath = []
51 self.pathLines = []
52 self.currentLine = -1
53 self.recordSize = 0
54 self.elementHash = {}
55 self.elementIndexes = []
56 self.namespaces = []
57
58 - def startPrefixMapping(self, pfx, uri):
59 self.currentLine += 1
60 if (pfx == None):
61 pfx = ''
62 self.currentText.append("6 %r, %r" % (pfx, uri))
63
64
65 - def startElement(self, name, attrs):
66 self.currentLine += 1
67 self.pathLines.append(self.currentLine)
68 try:
69 parent = self.pathLines[-2]
70 except IndexError:
71 parent = -1
72 attrHash = {}
73 if (attrs):
74 for k in attrs.keys():
75 attrHash[k] = escape(attrs[k])
76
77 try:
78 npred = self.elementIndexes[-1][name] + 1
79 self.elementIndexes[-1][name] += 1
80 except IndexError:
81
82 npred = 1
83 self.elementIndexes = [{name: npred}]
84 except KeyError:
85
86 npred = 1
87 self.elementIndexes[-1][name] = 1
88 except:
89 print (name, self.elementIndexes)
90 raise
91 self.elementIndexes.append({})
92 self.currentText.append("1 %s %s %d %d" % (name, repr(attrHash), parent, npred))
93 saveAttrs = []
94 try:
95 hashAttrList = self.hashAttributesNames[name]
96 for a in hashAttrList:
97 try:
98 saveAttrs.append("%s[@%s='%s']" % (name, a, attrHash[a]))
99 except:
100 pass
101 except:
102 pass
103 try:
104 starAttrList = self.hashAttributesNames['*']
105 for a in starAttrList:
106 try:
107 saveAttrs.append("*[@%s='%s']" % (a, attrHash[a]))
108 except:
109 pass
110 except:
111 pass
112 if saveAttrs:
113 self.hashAttributes.append((self.currentLine, saveAttrs))
114
115
116
117 - def endElement(self, name):
118 self.currentLine += 1
119 start = self.pathLines.pop()
120 self.currentText.append("2 %s %d" % (name, start))
121 self.currentText[start] = "%s %d" % (self.currentText[start], self.currentLine)
122 self.elementIndexes.pop()
123 try:
124 self.elementHash[name].append([start, self.currentLine])
125 except:
126 self.elementHash[name] = [[start, self.currentLine]]
127 if self.hashAttributes and self.hashAttributes[-1][0] == start:
128 attrs = self.hashAttributes.pop()[1]
129 for sa in attrs:
130 try:
131 self.elementHash[sa].append([start, self.currentLine])
132 except:
133 self.elementHash[sa] = [[start, self.currentLine]]
134
135 - def startElementNS(self, name, qname, attrs):
136 self.currentLine += 1
137 self.pathLines.append(self.currentLine)
138 try:
139 parent = self.pathLines[-2]
140 except:
141 parent = -1
142 attrHash = {}
143
144 if (attrs):
145 for k in attrs.keys():
146 attrHash[k] = attrs[k]
147
148 simpleName = name[1]
149 try:
150 npred = self.elementIndexes[-1][simpleName] + 1
151 self.elementIndexes[-1][simpleName] += 1
152 except IndexError:
153
154 npred = 1
155 self.elementIndexes = [{simpleName: npred}]
156 except KeyError:
157
158 npred = 1
159 self.elementIndexes[-1][simpleName] = 1
160 self.elementIndexes.append({})
161
162 self.currentText.append("4 %r, %r, %r, %r %d %d" % (name[0], simpleName, qname, attrHash, parent, npred))
163
164 saveAttrs = []
165 try:
166 hashAttrList = self.hashAttributesNames[simpleName]
167 for a in hashAttrList:
168 try:
169 saveAttrs.append("%s[@%s='%s']" % (simpleName, a, attrHash[a]))
170 except:
171 pass
172 except:
173 pass
174 try:
175 starAttrList = self.hashAttributesNames['*']
176 for a in starAttrList:
177 try:
178 saveAttrs.append("*[@%s='%s']" % (a, attrHash[a]))
179 except:
180 pass
181 except:
182 pass
183 if saveAttrs:
184 self.hashAttributes.append((self.currentLine, saveAttrs))
185
186
187 - def endElementNS(self, name, qname):
188 self.currentLine += 1
189 start = self.pathLines.pop()
190 self.currentText.append("5 %r, %r, %r %d" % (name[0], name[1], qname, start))
191 self.currentText[start] ="%s %d" % (self.currentText[start], self.currentLine)
192 self.elementIndexes.pop()
193 try:
194 self.elementHash[name[1]].append([start, self.currentLine])
195 except:
196 self.elementHash[name[1]] = [[start, self.currentLine]]
197 if self.hashAttributes and self.hashAttributes[-1][0] == start:
198 attrs = self.hashAttributes.pop()[1]
199 for sa in attrs:
200 try:
201 self.elementHash[sa].append([start, self.currentLine])
202 except:
203 self.elementHash[sa] = [[start, self.currentLine]]
204
205 - def characters(self, text, start=0, length=-1):
206
207
208 prev = self.currentText[-1]
209 if self.stripWS and text.isspace():
210 return
211 self.currentLine += 1
212 if (len(text) != 1 and len(prev) != 3 and prev[0] == "3" and not prev[-1] in [' ', '-']):
213
214 text = ' ' + text
215 self.currentText.append("3 %s" % (text))
216 self.recordSize += len(text.split())
217
218 - def ignorableWhitespace(self, ws):
219
220 pass
221
222 - def processingInstruction(self, target, data):
223 pass
224 - def skippedEntity(self, name):
225 pass
226
286
287 s2dhandler = SaxToDomHandler()
288
290 xml = []
291 currNs = 0
292 newNamespaces = {}
293
299
303
305 attrs = []
306 for a in attribs:
307 attrs.append('%s="%s"' % (a, attribs[a]))
308 attribtxt = ' '.join(attrs)
309 if (attribtxt):
310 attribtxt = " " + attribtxt
311 self.xml.append("<%s%s>" % (name, attribtxt))
312
315
317 if (not ns):
318 return ""
319 pref = self.namespaces.get(ns, None)
320 if (pref == None):
321 self.currNs += 1
322 pref = "ns%d" % (self.currNs)
323 self.namespaces[ns] = pref
324 self.newNamespaces[pref] = ns
325 return pref
326
328 pref = self._getPrefix(n[0])
329 if (pref):
330 name = "%s:%s" % (pref, n[1])
331 else:
332 name = n[1]
333 attrlist = []
334 for ns,aname in attrs:
335 p2 = self._getPrefix(ns)
336 if (p2):
337 nsaname = "%s:%s" % (p2, aname)
338 else:
339 nsaname = aname
340 attrlist.append('%s="%s"' % (nsaname, attrs[(ns,aname)]))
341 for x in self.newNamespaces.items():
342 if (x[0]):
343 attrlist.append('xmlns:%s="%s"' % (x[0], x[1]))
344 else:
345 attrlist.append('xmlns="%s"' % (x[1]))
346 self.newNamespaces = {}
347 attribtxt = ' '.join(attrlist)
348 if (attribtxt):
349 attribtxt = " " + attribtxt
350 self.xml.append("<%s%s>" % (name,attribtxt))
351
359
363
365 return ''.join(self.xml)
366
367
368 s2xhandler = SaxToXmlHandler()
369
370
372 pass
373
374
376 context = None
377 size = 0
378
379 - def __init__(self, domNode, xml="", docid=None):
386
387
389 pass
390
401
404
406 useNamespace = 1
407
409 if (self.xml):
410 return self.xml
411 else:
412 self.xml = self.dom.toxml()
413 return self.xml
414
420
422 if (node.nodeType == utils.elementType):
423 name = node.localName
424 ns = node.namespaceURI
425 attrHash = {}
426 for ai in range(node.attributes.length):
427 attr = node.attributes.item(ai)
428 if self.namespaces:
429 if attr.namespaceURI == 'http://www.w3.org/2000/xmlns/':
430 self.handler.startPrefixMapping(attr.localName, attr.value)
431 else:
432 attrHash[(attr.namespaceURI, attr.localName)] = attr.value
433 else:
434 attrHash[attr.localName] = attr.value
435 if self.namespaces:
436 self.handler.startElementNS((node.namespaceURI, node.localName), None, attrHash)
437 else:
438 self.handler.startElement(node.localName, attrHash)
439 for c in node.childNodes:
440 self._walk(c)
441 if self.namespaces:
442 self.handler.endElementNS((node.namespaceURI, node.localName), None)
443 else:
444 self.handler.endElement(node.localName)
445 elif node.nodeType == utils.textType:
446 self.handler.characters(node.data)
447
449
450 raise NotImplementedError
451
453
463
464
466 xp = tuple[0]
467 if (not self.context):
468 self.context = Context.Context(self.dom)
469 return xp.evaluate(self.context)
470
471
472 try:
473 from lxml import etree, sax
474 class LxmlRecord(DomRecord):
475
476 def __repr__(self):
477 if self.recordStore != None:
478 return "%s/%s" % (self.recordStore, self.id)
479 else:
480 return "Record-%d" % self.id