Module transformer
[hide private]
[frames] | no frames]

Source Code for Module transformer

  1   
  2  from configParser import C3Object 
  3  from baseObjects import Transformer 
  4  import os.path, time, utils, types 
  5  from document import StringDocument 
  6  from c3errors import ConfigFileException 
  7   
  8  from Ft.Xml.Xslt.Processor import Processor 
  9  from Ft.Xml import InputSource 
 10  from Ft.Xml.Domlette import ConvertDocument 
 11  from PyZ3950 import z3950, grs1 
 12  from PyZ3950.zmarc import MARC 
 13   
 14  from utils import verifyXPaths, saxToString 
 15  from utils import nonTextToken 
 16  from utils import elementType, flattenTexts, verifyXPaths 
 17   
 18   
19 -class FilepathTransformer(Transformer):
20 """ Returns record.id as an identifier, in raw SAX events. For use as the inTransformer of a recordStore """
21 - def process_record(self, session, rec):
22 sax = ['1 identifier {}', '3 ' + str(rec.id), '2 identifier'] 23 data = nonTextToken.join(sax) 24 return StringDocument(data)
25 26 # Simplest transformation ...
27 -class XmlTransformer(Transformer):
28 """ Return the raw XML string of the record """
29 - def process_record(self,session, rec):
30 return StringDocument(rec.get_xml())
31 32 33 # --- XSLT Transformers --- 34 35 try: 36 from lxml import etree 37 class LxmlXsltTransformer(Transformer): 38 """ XSLT transformer using Lxml implementation. Requires LxmlRecord """ 39 def __init__(self, session, config, parent): 40 Transformer.__init__(self, session, config, parent) 41 xfrPath = self.get_path(session, "xsltPath") 42 dfp = self.get_path(session, "defaultPath") 43 path = os.path.join(dfp, xfrPath) 44 et = etree.parse(path) 45 self.txr = etree.XSLT(et)
46 47 def process_record(self, session, rec): 48 # return StringDocument 49 dom = rec.get_dom() 50 result = self.txr(dom) 51 return StringDocument(str(result)) 52 53 except: 54 pass 55 56
57 -class XsltTransformer(Transformer):
58 """ 4Suite based XSLT transformer. """ 59
60 - def __init__(self, session, config, parent):
61 Transformer.__init__(self, session, config, parent) 62 xfrPath = self.get_path(session, "xsltPath") 63 dfp = self.get_path(session, "defaultPath") 64 path = os.path.join(dfp, xfrPath) 65 xfr = InputSource.DefaultFactory.fromStream(file(path), "file://" + path) 66 processor = Processor() 67 processor.appendStylesheet(xfr) 68 self.processor = processor
69
70 - def process_record(self, session, rec):
71 p = self.permissionHandlers.get('info:srw/operation/2/transform', None) 72 if p: 73 if not session.user: 74 raise PermissionException("Authenticated user required to transform using %s" % self.id) 75 okay = p.hasPermission(session, session.user) 76 if not okay: 77 raise PermissionException("Permission required to transform using %s" % self.id) 78 dom = rec.get_dom() 79 dfp = self.get_path(session, "defaultPath") 80 result = self.processor.runNode(dom, u'') 81 return StringDocument(result, self.id, rec.processHistory, parent=rec.parent)
82 83 84 # --- Text, CSV Transformers --- 85
86 -class CSVTransformer(Transformer):
87 """ Create simple CSV format from indexes specified """ 88
89 - def _handleConfigNode(self, session, node):
90 # fields 91 # path type=index|workflow|XPathProcessor ref=id 92 # --> ordered list of fields 93 if node.localname == "fields": 94 fields = [] 95 for child in node.childNodes: 96 if child.nodeType == elementType: 97 if child.localname == "path": 98 otype = child.getAttributeNS(None, 'type') 99 if not otype in ['index', 'workflow', 'XPathProcessor']: 100 raise ConfigFileException("'%s' type not allowed for CSVTransformer %s (index, workflow, XPathProcessor)" % (otype, self.id)) 101 ref = child.getAttributeNS(None, 'ref') 102 obj = self.get_object(session, ref) 103 fields.append((type, obj)) 104 self.fields = fields
105 106
107 - def __init__(self, session, config, parent):
108 self.fields = [] 109 Transformer.__init__(self, session, config, parent)
110
111 - def process_record(self, session, rec):
112 # simple comma separated format 113 data = [] 114 for xp in self.fields: 115 try: 116 data.append(saxToString(rec.process_xpath(xp)[0])) 117 except IndexError: 118 # Missing Value 119 data.append('?') 120 line = ','.join(data) 121 return StringDocument(line)
122 123 124 # --- GRS1 Transformers for Z39.50 --- 125
126 -class GRS1Transformer(Transformer):
127 """ Create representation of the XML tree in Z39.50's GRS1 format """ 128
129 - def initState(self):
130 self.top = None 131 self.nodeStack = []
132
133 - def startElement(self, name, attribs):
134 node = z3950.TaggedElement() 135 node.tagType = 3 136 node.tagValue = ('string', name) 137 node.content = ('subtree', []) 138 139 for a in attribs: 140 # Elements with Metadata 141 anode = z3950.TaggedElement() 142 md = z3950.ElementMetaData() 143 anode.tagType = 3 144 anode.tagValue = ('string', a) 145 md.message = 'attribute' 146 anode.metaData = md 147 anode.content = ('octets', attribs[a]) 148 node.content[1].append(anode) 149 150 if (self.nodeStack): 151 self.nodeStack[-1].content[1].append(node) 152 else: 153 self.top = node 154 self.nodeStack.append(node)
155 156
157 - def endElement(self, foo):
158 if (self.nodeStack[-1].content[1] == []): 159 self.nodeStack[-1].content = ('elementEmpty', None) 160 self.nodeStack.pop()
161
162 - def characters(self, text, zero, length):
163 if (self.nodeStack): 164 if (text.isspace()): 165 text = " " 166 node = z3950.TaggedElement() 167 node.tagType = 2 168 node.tagValue = ('numeric', 19) 169 node.content = ('octets', text) 170 self.nodeStack[-1].content[1].append(node)
171 172
173 - def process_record(self, session, rec):
174 p = self.permissionHandlers.get('info:srw/operation/2/transform', None) 175 if p: 176 if not session.user: 177 raise PermissionException("Authenticated user required to transform using %s" % self.id) 178 okay = p.hasPermission(session, session.user) 179 if not okay: 180 raise PermissionException("Permission required to transform using %s" % self.id) 181 self.initState() 182 rec.saxify(self) 183 return StringDocument(self.top, self.id, rec.processHistory, parent=rec.parent)
184 185
186 -class GrsMapTransformer(Transformer):
187 """ Create a particular GRS1 instance, based on a configured map of XPath to GRS1 element. """ 188
189 - def _handleConfigNode(self,session, node):
190 if (node.localName == "transform"): 191 self.tagset = node.getAttributeNS(None, 'tagset') 192 maps = [] 193 for child in node.childNodes: 194 if (child.nodeType == elementType and child.localName == "map"): 195 map = [] 196 for xpchild in child.childNodes: 197 if (xpchild.nodeType == elementType and xpchild.localName == "xpath"): 198 map.append(flattenTexts(xpchild)) 199 if map[0][0] != "#": 200 vxp = verifyXPaths([map[0]]) 201 else: 202 # special case to process 203 vxp = [map[0]] 204 maps.append([vxp[0], map[1]]) 205 self.maps = maps
206
207 - def __init__(self, session, config, parent):
208 self.maps = [] 209 self.tagset = "" 210 Transformer.__init__(self, session, config, parent)
211
212 - def _resolveData(self, session, rec, xpath):
213 if xpath[0] != '#': 214 data = rec.process_xpath(xpath) 215 try: data = ' '.join(data) 216 except TypeError: 217 # data isn't sequence, maybe a string or integer 218 pass 219 try: 220 data = data.encode('utf-8') 221 except: 222 data = str(data) 223 elif xpath == '#RELEVANCE#': 224 data = rec.resultSetItem.scaledWeight 225 elif xpath == '#RAWRELEVANCE#': 226 data = rec.resultSetItem.weight 227 elif xpath == '#DOCID#': 228 data = rec.id 229 elif xpath == '#RECORDSTORE#': 230 data = rec.recordStore 231 elif xpath == '#PROXINFO#': 232 data = repr(rec.resultSetItem.proxInfo) 233 elif xpath[:8] == '#PARENT#': 234 # Get parent docid out of record 235 try: 236 parent = rec.process_xpath('/c3:component/@parent', {'c3':'http://www.cheshire3.org/'})[0] 237 except IndexError: 238 # probably no namespaces 239 parent = rec.process_xpath('/c3component/@parent')[0] 240 parentStore, parentId = parent.split('/', 1) 241 242 xtrapath = xpath[8:] 243 if xtrapath: 244 # actually get parent record to get stuff out of 245 # TODO: not sure the best way to do this yet :( 246 parentRec = self.parent.get_object(session, parentStore).fetch_record(session, parentId) 247 # strip leading slash from xtra path data 248 # N.B. double slash needed to root xpath to doc node (e.g. #PARENT#//root/somenode) 249 if parentRec: 250 xtrapath = xtrapath[1:] 251 data = self._resolveData(session, parentRec, xtrapath) 252 else: 253 # by default just return id of parent record 254 data = parentId 255 return data
256 257
258 - def process_record(self, session, rec):
259 elems = [] 260 for m in self.maps: 261 (xpath, tagPath) = m 262 node = z3950.TaggedElement() 263 data = self._resolveData(session, rec, xpath) 264 node.content = ('string', str(data)) 265 node.tagType = 2 266 node.tagValue = ('numeric', int(tagPath)) 267 elems.append(node) 268 return StringDocument(elems, self.id, rec.processHistory, parent=rec.parent)
269 270 271
272 -class XmlRecordStoreTransformer(Transformer):
273 """ Wrap the data with the record's metadata. For use as inTransformer of a recordStore. Not recommended. """ 274 275 # Transform a record, return 'string' to dump to database. 276 # (String might be a struct in other implementations) 277
278 - def process_record(self, session, rec):
279 280 vars = {'id' : rec.id, 'baseUri': rec.baseUri, 'schema' : rec.schema, 281 'schemaType' : rec.schemaType, 'status' : rec.status, 282 'size': rec.size} 283 if session == None or session.user == None: 284 vars['user'] = 'admin' 285 else: 286 vars['user'] = session.user.username 287 288 vars['now'] = time.strftime("%Y-%m-%d %H:%M:%S") 289 290 if (rec.recordStore <> None and rec.id <> None): 291 history = rec.history 292 histlist = [] 293 294 if (history): 295 history.append((vars['user'], vars['now'], 'modified')) 296 for h in history: 297 histlist.append('<c3:modification type="%s"><c3:date>%s</c3:date><c3:agent>%s</c3:agent></c3:modification>' % (h[2], h[1], h[0])) 298 histlist.append('<c3:modification type="modify"><c3:date>%(now)s</c3:date><c3:agent>%(user)s</c3:agent></c3:modification>' % (vars)) 299 histtxt = "\n".join(histlist) 300 else: 301 histtxt = '<c3:modification type="create"><c3:date>%(now)s</c3:date><c3:agent>%(user)s</c3:agent></c3:modification>' % (vars) 302 303 rightslist = [] 304 for r in rec.rights: 305 rightslist.append('<c3:%(userType)s role="%(role)s">%(user)</c3:%(userType)s>' % ({'userType' : r[1], 'role' : r[2], 'user': r[0]})) 306 rightstxt = '\n'.join(rightslist) 307 308 saxList = rec.get_sax() 309 saxList.append('9 ' + repr(rec.elementHash)) 310 sax = nonTextToken.join(saxList) 311 312 else: 313 histtxt = '<c3:modification type="create"><c3:date>%(now)s</c3:date><c3:agent>%(user)s</c3:agent></c3:modification>' % (vars) 314 rightstxt = '<c3:agent role="editor">%(user)s</c3:agent>' % (vars) 315 sax = "" 316 317 ph = [] 318 for item in rec.processHistory: 319 ph.append('<c3:object>%s</c3:object>' % (item)) 320 321 if (rec.parent[0]): 322 parent = "<c3:type>%s</c3:type><c3:store>%s</c3:store><c3:id>%d</c3:id>" % rec.parent 323 else: 324 parent = "" 325 326 vars['parent'] = parent 327 vars['processHistory'] = ''.join(ph) 328 vars['rights'] = rightstxt 329 vars['history'] = histtxt 330 vars['sax'] = sax 331 332 xml = u"""<c3:record xmlns:c3="http://www.cheshire3.org/schemas/record/1.0/"> 333 <c3:id>%(id)s</c3:id> 334 <c3:status>%(status)s</c3:status> 335 <c3:baseUri>%(baseUri)s</c3:baseUri> 336 <c3:schema>%(schema)s</c3:schema> 337 <c3:schemaType>%(schemaType)s</c3:schemaType> 338 <c3:size>%(size)d</c3:size> 339 <c3:parent>%(parent)s</c3:parent> 340 <c3:technicalRights> 341 %(rights)s 342 </c3:technicalRights> 343 <c3:history> 344 %(history)s 345 </c3:history> 346 <c3:processHistory> 347 %(processHistory)s 348 </c3:processHistory> 349 <c3:saxEvents>%(sax)s</c3:saxEvents> 350 </c3:record> 351 """ % (vars) 352 353 return StringDocument(xml)
354