Module database
[hide private]
[frames] | no frames]

Source Code for Module database

  1   
  2  from configParser import C3Object 
  3  from baseObjects import Database, Index, ProtocolMap, Record 
  4  from c3errors import ConfigFileException 
  5  from bootstrap import BSParser, BootstrapDocument 
  6  from resultSet import SimpleResultSet, BitmapResultSet, ArrayResultSet 
  7  import PyZ3950.CQLParser as cql,  PyZ3950.SRWDiagnostics as SRWDiagnostics 
  8  import os, sys 
  9   
 10   
 11  try: 
 12      # name when installed by hand 
 13      import bsddb3 as bdb 
 14  except: 
 15      # name that comes in python 2.3 
 16      import bsddb as bdb 
 17   
 18   
19 -class SimpleDatabase(Database):
20 """ Default database implementation """ 21
22 - def __init__(self, session, node, parent):
23 24 self.indexes = {} 25 self.protocolMaps = {} 26 self.indexConfigs = {} 27 self.protocolMapConfigs = {} 28 29 self.records = {} 30 31 self.totalRecords = 0 32 self.totalRecordSize = 0 33 self.minRecordSize = 10000000 34 self.maxRecordSize = 0 35 self.meanRecordSize = 0 36 37 Database.__init__(self, session, node, parent) 38 39 mp = self.get_path(None, 'metadataPath') 40 if (not mp): 41 raise(ConfigFileException('No metadata db path set in database configFile')) 42 if (not os.path.isabs(mp)): 43 dfp = self.get_path(None, 'defaultPath') 44 mp = os.path.join(dfp, mp) 45 46 if (not os.path.exists(mp)): 47 # We don't exist, try and instantiate new database 48 self._initialise(mp) 49 else: 50 cxn = bdb.db.DB() 51 try: 52 cxn.open(mp) 53 # Now load values. 54 recs = cxn.get("records") 55 totalRecs = cxn.get("totalRecords") 56 totalSize = cxn.get("totalRecordSize") 57 minSize = cxn.get("minRecordSize") 58 maxSize = cxn.get("maxRecordSize") 59 self.minRecordSize = long(minSize) 60 self.maxRecordSize = long(maxSize) 61 self.totalRecords = long(totalRecs) 62 self.totalRecordSize = long(totalSize) 63 self.records = eval(recs) 64 self.meanRecordSize = self.totalRecordSize / self.totalRecords 65 cxn.close() 66 except: 67 # Doesn't exist in usable form 68 self._initialise(mp)
69
70 - def _initialise(self, mp):
71 cxn = bdb.db.DB() 72 cxn.open(mp, dbtype=bdb.db.DB_BTREE, flags = bdb.db.DB_CREATE, mode=0660) 73 cxn.close()
74 75
76 - def _cacheIndexes(self, session):
77 storeList = self.get_path(session, 'indexStoreList') 78 if not storeList: 79 indexStore = self.get_path(session, 'indexStore') 80 if not indexStore: 81 raise ConfigFileException("No indexStore/indexStoreList associated with database: %s" % self.id) 82 storeList = [indexStore.id] 83 else: 84 storeList = storeList.split(' ') 85 for (id, dom) in self.indexConfigs.items(): 86 # see if index should be built 87 for c in dom.childNodes: 88 if c.nodeType == 1 and c.localName == 'paths': 89 for c2 in c.childNodes: 90 if c2.nodeType == 1 and c2.localName == 'object': 91 istore = c2.getAttributeNS(None, 'ref') 92 if istore in storeList: 93 o = self.get_object(session, id) 94 self.indexes[id] = o
95 - def _cacheProtocolMaps(self, session):
96 for id in self.protocolMapConfigs.keys(): 97 pm = self.get_object(session, id) 98 self.protocolMaps[pm.protocol] = pm
99 100
101 - def commit_metadata(self, session):
102 cxn = bdb.db.DB() 103 mp = self.get_path(None, 'metadataPath') 104 if (not os.path.isabs(mp)): 105 dfp = self.get_path(None, 'defaultPath') 106 mp = os.path.join(dfp, mp) 107 self.meanRecordSize = self.totalRecordSize / self.totalRecords 108 try: 109 cxn.open(mp) 110 cxn.put("totalRecords", str(self.totalRecords)) 111 cxn.put("totalRecordSize", str(self.totalRecordSize)) 112 cxn.put("minRecordSize", str(self.minRecordSize)) 113 cxn.put("maxRecordSize", str(self.maxRecordSize)) 114 cxn.put("records", repr(self.records)) 115 cxn.close() 116 except: 117 raise ValueError
118
119 - def add_record(self, session, record=None):
120 self.totalRecords += 1 121 if record: 122 (storeid, id) = (record.recordStore, record.id) 123 124 try: 125 full = self.records.get(storeid, [[]]) 126 k = full[-1] 127 if (len(k) > 1 and k[1] == id -1): 128 k[1] = id 129 elif ((len(k) == 1 and k[0] == id -1) or not k): 130 k.append(id) 131 else: 132 full.append([id]) 133 self.records[storeid] = full 134 except: 135 pass 136 137 # And record size 138 self.totalRecordSize += record.size 139 if (record.size > self.maxRecordSize): 140 self.maxRecordSize = record.size 141 if (record.size < self.minRecordSize): 142 self.minRecordSize = record.size 143 return record
144 145
146 - def index_record(self, session, record):
147 if not self.indexes: 148 self._cacheIndexes(session) 149 for idx in self.indexes.values(): 150 idx.index_record(session, record) 151 return record
152
153 - def remove_record(self, session, record):
154 self.totalRecords -= 1 155 (storeid, id) = (record.recordStore, record.id) 156 if (record.size): 157 self.totalRecordSize -= record.size
158
159 - def unindex_record(self, session, record):
160 if not self.indexes: 161 self._cacheIndexes(session) 162 for idx in self.indexes.values(): 163 idx.delete_record(session, record) 164 return None
165
166 - def begin_indexing(self, session):
167 if not self.indexes: 168 self._cacheIndexes(session) 169 for idx in self.indexes.values(): 170 idx.begin_indexing(session) 171 return None
172
173 - def commit_indexing(self, session):
174 for idx in self.indexes.values(): 175 idx.commit_indexing(session) 176 return None
177
178 - def _search(self, session, query):
179 if (isinstance(query, cql.SearchClause)): 180 # Check resultset 181 rsid = query.getResultSetId() 182 if (rsid): 183 # Get existing result set 184 rss = self.get_object(session, "defaultResultSetStore") 185 return rss.fetch_resultSet(session, rsid) 186 else: 187 pm = self.get_path(session, 'protocolMap') 188 if not pm: 189 self._cacheProtocolMaps(session) 190 pm = self.protocolMaps.get('http://www.loc.gov/zing/srw/') 191 self.paths['protocolMap'] = pm 192 idx = pm.resolveIndex(session, query) 193 if (idx != None): 194 query.config = pm 195 rs = idx.search(session, query, self) 196 query.config = None 197 return rs 198 else: 199 d = SRWDiagnostics.Diagnostic16() 200 d.details = query.index.toCQL() 201 raise d 202 else: 203 left = self._search(session, query.leftOperand) 204 right = self._search(session, query.rightOperand) 205 if left.__class__ == right.__class__: 206 new = left.__class__(session, [], recordStore=left.recordStore) 207 elif left.__class__ == BitmapResultSet: 208 # Want to switch the left/right, but rset assumes list[0] is same type 209 new = right.__class__(session, [], recordStore=right.recordStore) 210 return new.combine(session, [right, left], query, self) 211 elif right.__class__ == BitmapResultSet: 212 new = left.__class__(session, [], recordStore=left.recordStore) 213 else: 214 new = SimpleResultSet(session, []) 215 return new.combine(session, [left, right], query, self)
216
217 - def search(self, session, query):
218 rs = self._search(session, query) 219 # now do top level stuff, like sort 220 if rs.relevancy: 221 rs.scale_weights() 222 rs.order(session, "weight") 223 else: 224 # check query for sort 225 pass 226 return rs
227
228 - def scan(self, session, query, numReq, direction=">="):
229 if (not isinstance(query, cql.SearchClause)): 230 d = SRWDiagnostics.Diagnostic38() 231 d.details = "Cannot use boolean in scan" 232 raise d 233 pm = self.get_path(session, 'protocolMap') 234 if not pm: 235 self._cacheProtocolMaps(session) 236 pm = self.protocolMaps.get('http://www.loc.gov/zing/srw/') 237 self.paths['protocolMap'] = pm 238 idx = pm.resolveIndex(session, query) 239 if (idx != None): 240 return idx.scan(session, query.term.value, numReq, direction) 241 else: 242 d = SRWDiagnostics.Diagnostic16() 243 d.details = query.index.toCQL() 244 raise d
245
246 - def sort(self, session, sets, keys):
247 # Needed for Z sorts by index 248 pass
249