Module recordStore
[hide private]
[frames] | no frames]

Source Code for Module recordStore

  1   
  2  from configParser import C3Object 
  3  from baseObjects import RecordStore, Record 
  4  from baseStore import SimpleStore, BdbStore 
  5  from record import SaxRecord 
  6  from c3errors import * 
  7  from document import StringDocument 
  8  import time, cPickle 
  9  from utils import nonTextToken 
 10   
 11  # Fastest to pickle elementHash, append to list, then join with nonTextToken 
 12   
 13   
14 -class SimpleRecordStore(RecordStore):
15 inTransformer = None 16 outParser = None 17 idNormalizer = None 18
19 - def __init__(self, session, node, parent):
20 if (not self.paths): 21 RecordStore.__init__(self, session, node, parent) 22 self.inTransformer = self.get_path(None, 'inTransformer') 23 self.outParser = self.get_path(None, 'outParser') 24 self.idNormalizer = self.get_path(None, 'idNormalizer')
25 26
27 - def create_record(self, session, record=None):
28 29 p = self.permissionHandlers.get('info:srw/operation/1/create', None) 30 if p: 31 if not session.user: 32 raise PermissionException("Authenticated user required to create an object in %s" % self.id) 33 okay = p.hasPermission(session, session.user) 34 if not okay: 35 raise PermissionException("Permission required to create an object in %s" % self.id) 36 37 id = self.generate_id(session) 38 if (record == None): 39 # Create a placeholder 40 record = SaxRecord([], "", id) 41 else: 42 record.id = id 43 record.recordStore = self.id 44 try: 45 self.store_record(session, record) 46 except ObjectAlreadyExistsException: 47 # Back out id change 48 self.currentId -= 1 49 raise 50 except: 51 raise 52 return record
53
54 - def replace_record(self, session, record):
55 # Hook for permissions check 56 p = self.permissionHandlers.get('info:srw/operation/1/replace', None) 57 if p: 58 if not session.user: 59 raise PermissionException("Authenticated user required to replace an object in %s" % self.id) 60 okay = p.hasPermission(session, session.user) 61 if not okay: 62 raise PermissionException("Permission required to replace an object in %s" % self.id) 63 self.store_record(session, record)
64
65 - def store_record(self, session, record):
66 record.recordStore = self.id 67 if type(record.id) == unicode: 68 record.id = record.id.encode('utf-8') 69 70 # Maybe add metadata, etc. 71 if self.inTransformer != None: 72 doc = self.inTransformer.process_record(session, record) 73 data = doc.get_raw() 74 else: 75 sax = record.get_sax() 76 sax.append("9 " + cPickle.dumps(record.elementHash)) 77 data = nonTextToken.join(sax) 78 79 self.verify_checkSum(session, record.id, data) 80 self.store_data(session, record.id, data, record.size) 81 return record
82
83 - def fetch_record(self, session, id, parser=None):
84 p = self.permissionHandlers.get('info:srw/operation/2/retrieve', None) 85 if p: 86 if not session.user: 87 raise PermissionException("Authenticated user required to retrieve an object from %s" % self.id) 88 okay = p.hasPermission(session, session.user) 89 if not okay: 90 raise PermissionException("Permission required to retrieve an object from %s" % self.id) 91 92 data = self.fetch_data(session, id) 93 if (data): 94 # Allow custom parser 95 if (parser <> None): 96 doc = StringDocument(data) 97 record = parser.process_document(session, doc) 98 elif (self.outParser <> None): 99 doc = StringDocument(data) 100 record = self.outParser.process_document(session, doc) 101 else: 102 # Assume raw sax events 103 104 data = unicode(data, 'utf-8') 105 sax = data.split(nonTextToken) 106 if sax[-1][0] == "9": 107 line = sax.pop() 108 elemHash = cPickle.loads(str(line[2:])) 109 else: 110 elemHash = {} 111 112 record = SaxRecord(sax) 113 record.elementHash = elemHash 114 115 # Ensure basic required info 116 record.id = id 117 record.recordStore = self.id 118 return record 119 else: 120 raise FileDoesNotExistException()
121
122 - def delete_record(self, session, id):
123 p = self.permissionHandlers.get('info:srw/operation/1/delete', None) 124 if p: 125 if not session.user: 126 raise PermissionException("Authenticated user required to delete an object from %s" % self.id) 127 okay = p.hasPermission(session, session.user) 128 if not okay: 129 raise PermissionException("Permission required to replace an object from %s" % self.id) 130 131 # XXX This if -sucks- 132 # Need to fix workflow to send id, not object 133 if isinstance(id, Record): 134 id = id.id 135 self.delete_item(session, id)
136
137 - def fetch_recordSize(self, session, id):
138 return self.fetch_size(session, id)
139
140 - def fetch_recordChecksum(self, session, id, parser=None):
141 return self.fetch_checksum(session, id)
142
143 - def process_data(self, session, id, data, parser=None):
144 if (parser <> None): 145 doc = StringDocument(data) 146 record = parser.process_document(session, doc) 147 elif (self.outParser <> None): 148 doc = StringDocument(data) 149 record = self.outParser.process_document(session, doc) 150 else: 151 # Assume raw sax events 152 data = unicode(data, 'utf-8') 153 sax = data.split(nonTextToken) 154 if sax[-1][0] == "9": 155 line = sax.pop() 156 elemHash = cPickle.loads(str(line[2:])) 157 else: 158 elemHash = {} 159 record = SaxRecord(sax) 160 record.elementHash = elemHash 161 # Ensure basic required info 162 record.id = id 163 record.recordStore = self.id 164 return record
165 166 167 from baseStore import BdbIter
168 -class BdbRecordIter(BdbIter):
169 # Get data from bdbIter and turn into record 170
171 - def next(self):
172 d = BdbIter.next(self) 173 rec = self.store.process_data(None, d[0], d[1]) 174 return rec
175
176 -class BdbRecordStore(BdbStore, SimpleRecordStore):
177 - def __init__(self, session, node, parent):
180
181 - def __iter__(self):
182 # return an iter object 183 return BdbRecordIter(self)
184 185 try: 186 from baseStore import PostgresStore 187 class PostgresRecordStore(PostgresStore, SimpleRecordStore): 188 def __init__(self, session, node, parent): 189 PostgresStore.__init__(self, session, node, parent) 190 SimpleRecordStore.__init__(self, session, node, parent)
191 except: 192 pass 193 194 195
196 -class ParsingRecordStore(SimpleRecordStore):
197 # Store in unparsed format. Parse on load 198 # cf buildassoc vs datastore in C2 199 200 documentStore = None 201 workflow = None 202
203 - def __init__(self, session, config, parent):
204 SimpleRecordStore.__init__(self, session, config, parent) 205 self.documentStore = self.get_path(session, 'documentStore') 206 self.workflow = self.get_path(session, 'workflow')
207 208
209 - def create_record(self, session, record):
210 # just copy some stuff around... 211 record.recordStore = self.id 212 record.id = record.parent[2] 213 if record.id == -1: 214 raise ValueError 215 return record
216
217 - def fetch_record(self, session, id):
218 # Fetch record from docStore, preparse, parse, return 219 doc = self.documentStore.fetch_document(session, id) 220 rec = self.workflow.process(session, doc) 221 rec.recordStore = self.id 222 rec.id = id 223 return rec
224
225 - def store_record(self, session, record):
226 raise NotImplementedError
227
228 - def begin_storing(self, session):
229 # Should we error? 230 return None
231
232 - def commit_storing(self, session):
233 return None
234 235
236 -class MarcIter(BdbIter):
237 recordStore = None 238 documentStore = None 239 workflow = None 240
241 - def __init__(self, recStore):
242 self.recordStore = recStore 243 self.workflow = recStore.workflow 244 BdbIter.__init__(self, recStore.documentStore)
245
246 - def next(self):
247 d = BdbIter.next(self) 248 # d[0] is id 249 # d[1] is raw data 250 rec = MarcRecord(StringDocument(d[1])) 251 rec.recordStore = self.recordStore.id 252 rec.id = d[0] 253 return rec
254 255 256 from record import MarcRecord
257 -class MarcRecordStore(ParsingRecordStore):
258 documentStore = None 259
260 - def __iter__(self):
261 # Return an iterator object that calls self.workflow 262 return MarcIter(self)
263
264 - def fetch_record(self, session, id):
265 doc = self.documentStore.fetch_document(session, id) 266 try: 267 rec = MarcRecord(doc) 268 except: 269 # XXX busted document = no record 270 rec = SaxRecord([]) 271 rec.recordStore = self.id 272 rec.id = id 273 return rec
274
275 - def fetch_size(self, session, id):
276 return self.documentStore.fetch_size(session, id)
277 278 279 # Task API for PVM/MPI/SOAP/etc
280 -class RemoteWriteRecordStore(BdbRecordStore):
281 """ Listen for records and write """ 282
283 - def store_data_remote(self, session, data, size):
284 # Return Id to other task 285 id = self.generate_id(session) 286 self.store_data(session, id, data, size) 287 return id
288 289
290 -class RemoteSlaveRecordStore(SimpleRecordStore):
291 recordStore = "" 292 writeTask = None 293 taskType = None 294 protocol = "" 295
296 - def __init__(self, session, config, parent):
297 SimpleRecordStore.__init__(self, session, config, parent) 298 self.writeTask = None 299 self.recordStore = self.get_path(session, 'remoteStore') 300 if not self.recordStore: 301 raise ConfigFileException('Missing recordStore identifier') 302 self.protocol = self.get_setting(session, 'protocol') 303 if self.protocol == 'PVM': 304 from pvmProtocolHandler import Task 305 self.taskType = Task 306 elif self.protocol == 'MPI': 307 from mpiProtocolHandler import Task 308 self.taskType = Task 309 else: 310 raise ConfigFileException('Unknown or missing protocol: %s' % self.protocol)
311
312 - def begin_storing(self, session, wt=None):
313 # set tasks 314 if wt: 315 self.writeTask = self.taskType(wt) 316 return None
317
318 - def create_record(self, session, record=None):
319 # Is this actually useful? 320 if (record == None): 321 record = SaxRecord([], "", "__new") 322 else: 323 record.id = "__new" 324 self.store_record(session, record) 325 return record
326
327 - def store_record(self, session, record):
328 # str()ify 329 if (self.inTransformer != None): 330 doc = self.inTransformer.process_record(session, record) 331 data = doc.get_raw() 332 else: 333 sax = record.get_sax() 334 sax.append("9 " + cPickle.dumps(record.elementHash)) 335 data = nonTextToken.join(sax) 336 337 # Now send to task 338 size = record.size 339 if (self.writeTask != None): 340 self.writeTask.send([self.recordStore, 'store_data_remote', [session, data, size], {}], 1) 341 msg = self.writeTask.recv() 342 else: 343 raise ValueError('WriteTask is None... did you call begin_storing?') 344 record.recordStore = self.recordStore 345 record.id = msg.data 346 return record
347
348 - def fetch_record(self, session, record):
349 raise NotImplementedError
350 351 352 try: 353 from baseStore import SrbStore, SrbBdbCombineStore 354 355 class SrbRecordStore(SimpleRecordStore, SrbStore): 356 357 def __init__(self, session, config, parent): 358 SrbStore.__init__(self,session,config,parent) 359 SimpleRecordStore.__init__(self, session, config, parent) 360 361 362 class CachingSrbRecordStore(SimpleRecordStore, SrbBdbCombineStore): 363 # Storing/fetching lots of small records is expensive 364 # Probably more expensive than finding records in a larger chunk 365 366 def __init__(self, session, config, parent): 367 SrbBdbCombineStore.__init__(self,session,config,parent) 368 SimpleRecordStore.__init__(self, session, config, parent) 369 370 371 class CachingSrbRemoteWriteRecordStore(SimpleRecordStore, SrbBdbCombineStore): 372 # Storing/fetching lots of small records is expensive 373 # Probably more expensive than finding records in a larger chunk 374 375 def __init__(self, session, config, parent): 376 SrbBdbCombineStore.__init__(self,session,config,parent) 377 SimpleRecordStore.__init__(self, session, config, parent) 378 379 def store_data_remote(self, session, data, size): 380 # Return Id to other task 381 id = self.generate_id(session) 382 self.store_data(session, id, data, size) 383 return id 384 385 except: 386 pass 387