Module workflow
[hide private]
[frames] | no frames]

Source Code for Module workflow

  1   
  2  from baseObjects import Workflow, Server 
  3  from configParser import C3Object 
  4  from utils import elementType, flattenTexts 
  5  from types import MethodType 
  6  import traceback, sys 
  7   
  8  from c3errors import C3Exception, ConfigFileException, ObjectDoesNotExistException 
  9   
10 -class WorkflowException(C3Exception):
11 pass
12 13
14 -class SimpleWorkflow(Workflow):
15 """ Default workflow implementation. Translates XML to python and compiles it on object instantiation """ 16 17 code = None 18 splitN = 0 19 splitCode = {} 20
21 - def __init__(self, session, node, parent):
22 self.splitN = 0 23 self.splitCode = {} 24 self.fnHash = {u'preParser' : 'process_document', 25 u'parser' : 'process_document', 26 u'transformer' : 'process_record', 27 u'index' : 'store_terms', 28 u'extractor' : 'process_xpathResult', 29 u'normalizer' : 'process_hash', 30 u'XPathProcessor' : 'process_record', 31 u'documentStore' : 'create_document', 32 u'recordStore' : 'create_record', 33 u'documentFactory' : 'load', 34 u'workflow' : 'process' } 35 Workflow.__init__(self, session, node, parent) 36 # Somewhere at the top there must be a server 37 self.server = parent
38
39 - def _handleConfigNode(self, session, node):
40 # <workflow> 41 if node.localName == "workflow": 42 # Nummy. 43 code = ['def handler(self, session, input=None):'] 44 code.append(' if session.database:') 45 code.append(' db = self.server.get_object(session, session.database)') 46 code.append(' self.database = db') 47 code.append(' else:') 48 code.append(' raise WorkflowException("No database")') 49 sub = self._handleFlow(node) 50 for s in sub: 51 code.append(" " + s) 52 code.append(' return input') 53 self.code = "\n".join(code) 54 exec self.code 55 setattr(self, 'process', MethodType(locals()['handler'], self, 56 self.__class__))
57
58 - def _handleFlow(self, node):
59 code = [] 60 for c in node.childNodes: 61 if c.nodeType == elementType: 62 n = c.localName 63 if n == "try": 64 code.append("try:") 65 sub = self._handleFlow(c) 66 for s in sub: 67 code.append(" " + s) 68 elif n == "except": 69 code.append("except Exception, err:") 70 sub = self._handleFlow(c) 71 for s in sub: 72 code.append(" " + s) 73 elif n == "break": 74 code.append("break") 75 elif n == "continue": 76 code.append("continue") 77 elif n == "return": 78 code.append("return") 79 elif n == "raise": 80 code.append("raise") 81 elif n == "assign": 82 fro = c.getAttributeNS(None, 'from') 83 to = c.getAttributeNS(None, 'to') 84 code.append("%s = %s" % (to, fro)) 85 elif n == "for-each": 86 fcode = self._handleForEach(c) 87 code.extend(fcode) 88 sub = self._handleFlow(c) 89 for s in sub: 90 code.append(" " + s) 91 elif n == "object": 92 code.extend(self._handleObject(c)) 93 elif n == "log": 94 code.extend(self._handleLog(c)) 95 elif n == "fork": 96 code.extend(self._handleFork(c)) 97 else: 98 raise ConfigFileException("Unknown workflow element: %s" % n) 99 return code
100
101 - def _handleLog(self, node):
102 code = [] 103 ref = node.getAttributeNS(None, 'ref') 104 if (ref): 105 code.append("object = db.get_object(session, '%s')" % ref) 106 else: 107 code.append("object = db.get_path(session, 'defaultLogger')") 108 text = flattenTexts(node) 109 if text.find(' ') > -1 and text[0] != '"': 110 text = repr(text) 111 code.append("object.log(session, str(%s))" % text) 112 return code
113
114 - def _handleForEach(self, node):
115 return ['looped = input', 'for input in looped:']
116
117 - def _handleObject(self, node):
118 ref = node.getAttributeNS(None, 'ref') 119 type = node.getAttributeNS(None, 'type') 120 function = node.getAttributeNS(None, 'function') 121 code = [] 122 if (ref): 123 code.append("object = db.get_object(session, '%s')" % ref) 124 elif type == 'database': 125 code.append("object = db") 126 elif type == 'input': 127 code.append("object = input") 128 elif type: 129 code.append("object = db.get_path(session, '%s')" % type) 130 else: 131 raise ConfigFileException("Could not determine object") 132 if not function: 133 # Assume most common for object type 134 function = self.fnHash[type] 135 136 singleFunctions = [u'begin_indexing', u'commit_indexing', 137 u'commit_metadata', u'begin_storing', 138 u'commit_storing'] 139 140 if (function in singleFunctions): 141 code.append('object.%s(session)' % function) 142 elif (type == 'index' and function == 'store_terms'): 143 code.append('object.store_terms(session, input, inRecord)') 144 elif type == 'documentFactory' and function == 'load' and input == None: 145 code.append('input = object.load(session)') 146 elif type == 'documentStore': 147 # Check for normalizer output 148 code.append('if type(input) == {}.__class__:') 149 code.append(' for k in input.keys():') 150 code.append(' object.%s(session, k)' % function) 151 code.append('else:') 152 code.append(' object.%s(session, input)' % function) 153 elif type == 'XPathProcessor': 154 code.append('global inRecord') 155 code.append('inRecord = input') 156 code.append('input = object.process_record(session, input)') 157 else: 158 code.append('result = object.%s(session, input)' % function) 159 code.append('if result != None:') 160 code.append(' input = result') 161 #code.append('else:') 162 #code.append(' raise WorkflowException("No function: %s on %%s" %% object)' % function) 163 return code
164 165
166 - def _handleSplit(self, node):
167 # <workflow> 168 fn = node.getAttributeNS(None, 'id') 169 if fn: 170 fname = "split_%s" % fn 171 else: 172 fname = "split%s" % self.splitN 173 self.splitN += 1 174 # XXX Can't we just call inputObject input in the def ? 175 code = ['def %s(self, session, inputObject):' % fname] 176 code.append(' input = inputObject') 177 code.append(' db = self.database') 178 179 sub = self._handleFlow(node) 180 for s in sub: 181 code.append(" " + s) 182 code.append(' return input') 183 codestr = "\n".join(code) 184 self.splitCode[fname] = codestr 185 exec codestr 186 setattr(self, fname, MethodType(locals()[fname], self, 187 self.__class__)) 188 return fname
189
190 - def _handleFork(self, node):
191 code = [] 192 for c in node.childNodes: 193 if c.nodeType == elementType: 194 if c.localName == "split": 195 fname = self._handleSplit(c) 196 code.append("self.%s(session, input)" % fname) 197 return code
198 199
200 -class CachingWorkflow(SimpleWorkflow):
201 """ Slightly faster workflow implementation that caches the objects. Object not to be used in one database and then another database without first calling workflow.load_cache(session, newDatabaseObject) """ 202 code = None 203 splitN = 0 204 splitCode = {} 205 objcache = {} 206 objrefs = None 207 database = None 208 defaultLogger = None 209
210 - def __init__(self, session, node, parent):
211 self.objcache = {} 212 self.objrefs = set() 213 self.database = None 214 self.defaultLogger = None 215 SimpleWorkflow.__init__(self, session, node, parent)
216 217
218 - def load_cache(self, session, db):
219 self.objcache = {} 220 self.database = db 221 self.defaultLogger = db.get_path(session, 'defaultLogger') 222 for o in self.objrefs: 223 obj = db.get_object(session, o) 224 if not obj: 225 raise ObjectDoesNotExistException(o) 226 self.objcache[o] = obj
227 228
229 - def _handleConfigNode(self, session, node):
230 # <workflow> 231 if node.localName == "workflow": 232 # Nummy. 233 code = ['def handler(self, session, input=None):'] 234 code.extend( 235 [" if not self.objcache:", 236 " db = session.server.get_object(session, session.database)", 237 " self.load_cache(session, db)"]) 238 sub = self._handleFlow(node) 239 for s in sub: 240 code.append(" " + s) 241 code.append(' return input') 242 self.code = "\n".join(code) 243 exec self.code 244 setattr(self, 'process', MethodType(locals()['handler'], self, 245 self.__class__))
246
247 - def _handleLog(self, node):
248 text = flattenTexts(node) 249 if text.find(' ') > -1 and text[0] != '"': 250 text = repr(text) 251 ref = node.getAttributeNS(None, 'ref') 252 if (ref): 253 self.objrefs.add(ref) 254 return ["self.objcache[%s].log(session, str(%s))" % (ref, text)] 255 else: 256 return ["self.defaultLogger.log(session, str(%s))" % (text)]
257 258
259 - def _handleObject(self, node):
260 ref = node.getAttributeNS(None, 'ref') 261 type = node.getAttributeNS(None, 'type') 262 function = node.getAttributeNS(None, 'function') 263 code = [] 264 if (ref): 265 self.objrefs.add(ref) 266 o = "self.objcache['%s']" % ref 267 elif type == 'database': 268 o = "self.database" 269 elif type == 'input': 270 o = "input" 271 elif type: 272 code.append("obj = self.database.get_path(session, '%s')" % type) 273 o = "obj" 274 else: 275 raise ConfigFileException("Could not determine object") 276 if not function: 277 # Assume most common for object type 278 function = self.fnHash[type] 279 280 singleFunctions = [u'begin_indexing', u'commit_indexing', 281 u'commit_metadata', u'begin_storing', 282 u'commit_storing'] 283 noneFunctions = [u'add_record'] 284 285 if (function in singleFunctions): 286 code.append('%s.%s(session)' % (o, function)) 287 elif (function in noneFunctions): 288 code.append('%s.%s(session, input)' % (o, function)) 289 elif (type == 'index' and function == 'store_terms'): 290 code.append('%s.store_terms(session, input, inRecord)' % o) 291 elif type == 'documentFactory' and function == 'load' and input == None: 292 code.append('input = %s.load(session)' % o) 293 elif type == 'documentStore': 294 # Check for normalizer output 295 code.append('if type(input) == {}.__class__:') 296 code.append(' for k in input.keys():') 297 code.append(' %s.%s(session, k)' % (o, function)) 298 code.append('else:') 299 code.append(' %s.%s(session, input)' % (o, function)) 300 elif type == 'XPathProcessor': 301 code.append('global inRecord') 302 code.append('inRecord = input') 303 code.append('input = %s.process_record(session, input)' % o) 304 else: 305 code.append('input = %s.%s(session, input)' % (o, function)) 306 return code
307 308
309 - def _handleSplit(self, node):
310 # <workflow> 311 fn = node.getAttributeNS(None, 'id') 312 if fn: 313 fname = "split_%s" % fn 314 else: 315 fname = "split%s" % self.splitN 316 self.splitN += 1 317 code = ['def %s(self, session, input):' % fname] 318 sub = self._handleFlow(node) 319 for s in sub: 320 code.append(" " + s) 321 code.append(' return input') 322 codestr = "\n".join(code) 323 self.splitCode[fname] = codestr 324 exec codestr 325 setattr(self, fname, MethodType(locals()[fname], self, 326 self.__class__)) 327 return fname
328