Module littletable
[frames] | no frames]

Source Code for Module littletable

   1  # 
   2  # littletable.py 
   3  #  
   4  # littletable is a simple in-memory database for ad-hoc or user-defined objects, 
   5  # supporting simple query and join operations - useful for ORM-like access 
   6  # to a collection of data objects, without dealing with SQL 
   7  # 
   8  # 
   9  # Copyright (c) 2010  Paul T. McGuire 
  10  # 
  11  # Permission is hereby granted, free of charge, to any person obtaining 
  12  # a copy of this software and associated documentation files (the 
  13  # "Software"), to deal in the Software without restriction, including 
  14  # without limitation the rights to use, copy, modify, merge, publish, 
  15  # distribute, sublicense, and/or sell copies of the Software, and to 
  16  # permit persons to whom the Software is furnished to do so, subject to 
  17  # the following conditions: 
  18  # 
  19  # The above copyright notice and this permission notice shall be 
  20  # included in all copies or substantial portions of the Software. 
  21  # 
  22  # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
  23  # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
  24  # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 
  25  # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
  26  # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
  27  # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
  28  # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 
  29  # 
  30   
  31  __doc__ = """\ 
  32   
  33  C{littletable} - a Python module to give ORM-like access to a collection of objects 
  34   
  35  The C{littletable} module provides a low-overhead, schema-less, in-memory database access to a  
  36  collection of user objects.  C{littletable} provides a L{DataObject} class for ad hoc creation 
  37  of semi-immutable objects that can be stored in a C{littletable} L{Table}. 
  38   
  39  In addition to basic ORM-style insert/remove/query/delete access to the contents of a  
  40  Table, C{littletable} offers: 
  41   - simple indexing for improved retrieval performance, and optional enforcing key uniqueness 
  42   - access to objects using indexed attributes 
  43   - simplified joins using '+' operator syntax between annotated Tables 
  44   - the result of any query or join is a new first-class C{littletable} Table 
  45   
  46  C{littletable} Tables do not require an upfront schema definition, but simply work off of the 
  47  attributes in the stored values, and those referenced in any query parameters. 
  48   
  49  Here is a simple C{littletable} data storage/retrieval example:: 
  50   
  51      from littletable import Table, DataObject 
  52   
  53      customers = Table('customers') 
  54      customers.create_index("id", unique=True) 
  55      customers.insert(DataObject(id="0010", name="George Jetson")) 
  56      customers.insert(DataObject(id="0020", name="Wile E. Coyote")) 
  57      customers.insert(DataObject(id="0030", name="Jonny Quest")) 
  58   
  59      catalog = Table('catalog') 
  60      catalog.create_index("sku", unique=True) 
  61      catalog.insert(DataObject(sku="ANVIL-001", descr="1000lb anvil", unitofmeas="EA",unitprice=100)) 
  62      catalog.insert(DataObject(sku="BRDSD-001", descr="Bird seed", unitofmeas="LB",unitprice=3)) 
  63      catalog.insert(DataObject(sku="MAGNT-001", descr="Magnet", unitofmeas="EA",unitprice=8)) 
  64      catalog.insert(DataObject(sku="MAGLS-001", descr="Magnifying glass", unitofmeas="EA",unitprice=12)) 
  65   
  66      wishitems = Table('wishitems') 
  67      wishitems.create_index("custid") 
  68      wishitems.create_index("sku") 
  69      wishitems.insert(DataObject(custid="0020", sku="ANVIL-001")) 
  70      wishitems.insert(DataObject(custid="0020", sku="BRDSD-001")) 
  71      wishitems.insert(DataObject(custid="0020", sku="MAGNT-001")) 
  72      wishitems.insert(DataObject(custid="0030", sku="MAGNT-001")) 
  73      wishitems.insert(DataObject(custid="0030", sku="MAGLS-001")) 
  74   
  75      # print a particular customer name  
  76      # (unique indexes will return a single item; non-unique 
  77      # indexes will return a list of all matching items) 
  78      print customers.id["0030"].name 
  79   
  80      # print all items sold by the pound 
  81      for item in catalog.query(unitofmeas="LB"): 
  82          print item.sku, item.descr 
  83   
  84      # print all items that cost more than 10 
  85      for item in catalog.where(lambda o : o.unitprice>10): 
  86          print item.sku, item.descr, item.unitprice 
  87   
  88      # join tables to create queryable wishlists collection 
  89      wishlists = customers.join_on("id") + wishitems.join_on("custid") + catalog.join_on("sku") 
  90   
  91      # print all wishlist items with price > 10 
  92      bigticketitems = wishlists().where(lambda ob : ob.unitprice > 10) 
  93      for item in bigticketitems: 
  94          print item 
  95   
  96      # list all wishlist items in descending order by price 
  97      for item in wishlists().query(_orderbydesc="unitprice"): 
  98          print item 
  99  """ 
 100   
 101  __version__ = "0.3" 
 102  __versionTime__ = "24 Oct 2010 21:00" 
 103  __author__ = "Paul McGuire <ptmcg@users.sourceforge.net>" 
 104   
 105  import sys 
 106  from collections import defaultdict 
 107  from itertools import groupby,ifilter,islice,starmap,repeat 
 108  import csv 
 109   
 110  try: 
 111      from itertools import product 
 112  except ImportError: 
113 - def product(aseq,bseq):
114 for a in aseq: 115 for b in bseq: 116 yield a,b
117 118 try: 119 t = basestring 120 except NameError: 121 basestring = str 122 123 __all__ = ["DataObject", "Table", "JoinTerm", "PivotTable"] 124
125 -def _object_attrnames(obj):
126 if hasattr(obj, "__dict__"): 127 # normal object 128 return obj.__dict__.keys() 129 elif isinstance(obj, tuple) and hasattr(obj, "_fields"): 130 # namedtuple 131 return obj._fields 132 elif hasattr(obj, "__slots__"): 133 return obj.__slots__ 134 else: 135 raise ValueError("object with unknown attributes")
136
137 -class DataObject(object):
138 """A generic semi-mutable object for storing data values in a table. Attributes 139 can be set by passing in named arguments in the constructor, or by setting them 140 as C{object.attribute = value}. New attributes can be added any time, but updates 141 are ignored. Table joins are returned as a Table of DataObjects."""
142 - def __init__(self, **kwargs):
143 if kwargs: 144 self.__dict__.update(kwargs)
145 - def __repr__(self):
146 return repr(self.__dict__)
147 - def __setattr__(self, attr, val):
148 # make all attributes write-once 149 if attr not in self.__dict__: 150 super(DataObject,self).__setattr__(attr,val)
151 - def __getitem__(self, k):
152 if hasattr(self,k): 153 return getattr(self,k) 154 else: 155 raise KeyError("object has no such attribute " + k)
156
157 -class _ObjIndex(object):
158 - def __init__(self, attr):
159 self.attr = attr 160 self.obs = defaultdict(list) 161 self.is_unique = False
162 - def __setitem__(self, k, v):
163 self.obs[k].append(v)
164 - def __getitem__(self, k):
165 return self.obs.get(k,[])
166 - def __len__(self):
167 return len(self.obs)
168 - def __iter__(self):
169 return iter(self.obs)
170 - def keys(self):
171 return sorted(self.obs.keys())
172 - def items(self):
173 return self.obs.items()
174 - def remove(self, obj):
175 try: 176 k = getattr(obj, self.attr) 177 self.obs[k].remove(obj) 178 except (ValueError,AttributeError,KeyError): 179 pass
180 - def __contains__(self, key):
181 return key in self.obs
182 - def copy_template(self):
183 return self.__class__(self.attr)
184
185 -class _UniqueObjIndex(_ObjIndex):
186 - def __init__(self, attr, accept_none=False):
187 self.attr = attr 188 self.obs = {} 189 self.is_unique = True 190 self.accept_none = accept_none 191 self.none_values = set()
192 - def __setitem__(self, k, v):
193 if k: 194 if k not in self.obs: 195 self.obs[k] = v 196 else: 197 raise KeyError("duplicate key value %s" % k) 198 else: 199 self.none_values.add(v)
200 - def __getitem__(self, k):
201 if k: 202 return [self.obs.get(k)] if k in self.obs else [] 203 else: 204 return list(self.none_values)
205 - def __contains__(self, k):
206 if k: 207 return k in self.obs 208 else: 209 return self.accept_none and self.none_values
210 - def keys(self):
211 return sorted(self.obs.keys()) + ([None,] if self.none_values else [])
212 - def items(self):
213 return [(k,[v]) for k,v in self.obs.items()]
214 - def remove(self, obj):
215 k = getattr(obj, self.attr) 216 if k: 217 if k in self.obs: 218 del self.obs[k] 219 else: 220 self.none_values.discard(obj)
221
222 -class _ObjIndexWrapper(object):
223 - def __init__(self, ind):
224 self._index = ind
225 - def __getattr__(self, attr):
226 return getattr(self._index, attr)
227 - def __getitem__(self, k):
228 ret = Table() 229 if k in self._index: 230 ret.insert_many(self._index[k]) 231 return ret
232 - def __contains__(self, k):
233 return k in self._index
234
235 -class _UniqueObjIndexWrapper(object):
236 - def __init__(self, ind):
237 self._index = ind
238 - def __getattr__(self, attr):
239 return getattr(self._index, attr)
240 - def __contains__(self, k):
241 return k in self._index
242 - def __getitem__(self, k):
243 if k: 244 return self._index[k][0] 245 else: 246 ret = Table() 247 if k in self._index: 248 ret.insert_many(self._index[k]) 249 return ret
250 251
252 -class Table(object):
253 """Table is the main class in C{littletable}, for representing a collection of DataObjects or 254 user-defined objects with publicly accessible attributes or properties. Tables can be: 255 - created, with an optional name, using standard Python L{C{Table() constructor}<__init__>} 256 - indexed, with multiple indexes, with unique or non-unique values, see L{create_index} 257 - queried, specifying values to exact match in the desired records, see L{query} 258 - filtered (using L{where}), using a simple predicate function to match desired records; 259 useful for selecting using inequalities or compound conditions 260 - accessed directly for keyed values, using C{table.indexattribute[key]} - see L{__getattr__} 261 - joined, using L{join_on} to identify attribute to be used for joining with another table, and 262 L{join} or operator '+' to perform the actual join 263 - pivoted, using L{pivot} to create a nested structure of sub-tables grouping objects 264 by attribute values 265 - imported/exported to CSV-format files 266 Queries and joins return their results as new Table objects, so that queries and joins can 267 be easily performed as a succession of operations. 268 """
269 - def __init__(self, table_name=''):
270 """Create a new, empty Table. 271 @param table_name: name for Table 272 @type table_name: string (optional) 273 """ 274 self.table_name = table_name 275 self.obs = [] 276 self._indexes = {}
277
278 - def __len__(self):
279 """Return the number of objects in the Table.""" 280 return len(self.obs)
281
282 - def __iter__(self):
283 """Create an iterator over the objects in the Table.""" 284 return iter(self.obs)
285
286 - def __getitem__(self, i):
287 """Provides direct indexed/sliced access to the Table's underlying list of objects.""" 288 return self.obs[i]
289
290 - def __getattr__(self, attr):
291 """A quick way to query for matching records using their indexed attributes. The attribute 292 name is used to locate the index, and returns a wrapper on the index. This wrapper provides 293 dict-like access to the underlying records in the table, as in:: 294 295 employees.socsecnum["000-00-0000"] 296 customers.zipcode["12345"] 297 298 The behavior differs slightly for unique and non-unique indexes: 299 - if the index is unique, then retrieving a matching object, will return just the object; 300 if there is no matching object, C{KeyError} is raised 301 - if the index is non-unique, then all matching objects will be returned in a new Table, 302 just as if a regular query had been performed; if no objects match the key value, an empty 303 Table is returned and no exception is raised. 304 305 If there is no index defined for the given attribute, then C{AttributeError} is raised. 306 """ 307 if attr in self._indexes: 308 ret = self._indexes[attr] 309 if isinstance(ret, _UniqueObjIndex): 310 ret = _UniqueObjIndexWrapper(ret) 311 if isinstance(ret, _ObjIndex): 312 ret = _ObjIndexWrapper(ret) 313 return ret 314 raise AttributeError("Table '%s' has no index '%s'" % 315 (self.table_name, attr))
316
317 - def __bool__(self):
318 return bool(self.obs)
319 320 __nonzero__ = __bool__ 321
322 - def __call__(self, table_name):
323 """A simple way to assign a name to a table, such as those 324 dynamically created by joins and queries. 325 @param table_name: name for Table 326 @type table_name: string 327 """ 328 self.table_name = table_name 329 return self
330
331 - def copy_template(self):
332 """Create empty copy of the current table, with copies of all 333 index definitions. 334 """ 335 ret = Table(self.table_name) 336 for k,v in self._indexes.items(): 337 ret._indexes[k] = v.copy_template() 338 return ret
339
340 - def clone(self):
341 """Create full copy of the current table, including table contents 342 and index definitions. 343 """ 344 ret = self.copy_template() 345 ret.insert_many(self.obs) 346 return ret
347
348 - def create_index(self, attr, unique=False, accept_none=False):
349 """Create a new index on a given attribute. 350 If C{unique} is True and records are found in the table with duplicate 351 attribute values, the index is deleted and C{KeyError} is raised. 352 353 If the table already has an index on the given attribute, then no 354 action is taken and no exception is raised. 355 @param attr: the attribute to be used for indexed access and joins 356 @type attr: string 357 @param unique: flag indicating whether the indexed field values are 358 expected to be unique across table entries 359 @type unique: boolean 360 @param accept_none: flag indicating whether None is an acceptable 361 value for this attribute 362 @type accept_none: boolean 363 """ 364 if attr in self._indexes: 365 return 366 367 if unique: 368 self._indexes[attr] = _UniqueObjIndex(attr,accept_none) 369 else: 370 self._indexes[attr] = _ObjIndex(attr) 371 accept_none = True 372 ind = self._indexes[attr] 373 try: 374 for obj in self.obs: 375 if hasattr(obj, attr): 376 obval = getattr(obj, attr) or None 377 else: 378 obval = None 379 if obval or accept_none: 380 ind[obval] = obj 381 else: 382 raise KeyError("None is not an allowed key") 383 384 except KeyError: 385 del self._indexes[attr] 386 raise
387
388 - def delete_index(self, attr):
389 """Deletes an index from the Table. Can be used to drop and rebuild an index, 390 or to convert a non-unique index to a unique index, or vice versa. 391 @param attr: name of an indexed attribute 392 @type attr: string 393 """ 394 if attr in self._indexes: 395 del self._indexes[attr]
396
397 - def insert(self, obj):
398 """Insert a new object into this Table. 399 @param obj: any Python object 400 Objects can be constructed using the defined DataObject type, or they can 401 be any Python object that does not use the Python C{__slots__} feature; C{littletable} 402 introspect's the object's C{__dict__} or C{_fields} attributes to obtain join and 403 index attributes and values. 404 405 If the table contains a unique index, and the record to be inserted would add 406 a duplicate value for the indexed attribute, then C{KeyError} is raised, and the 407 object is not inserted. 408 409 If the table has no unique indexes, then it is possible to insert duplicate 410 objects into the table. 411 """ 412 413 # verify new object doesn't duplicate any existing unique index values 414 uniqueIndexes = [ind for ind in self._indexes.values() if ind.is_unique] 415 if any((getattr(obj, ind.attr, None) is None and not ind.accept_none) 416 or ( 417 hasattr(obj, ind.attr) and getattr(obj, ind.attr) in ind 418 ) 419 for ind in uniqueIndexes): 420 # had a problem, find which one 421 for ind in uniqueIndexes: 422 if (getattr(obj, ind.attr, None) is None and not ind.accept_none): 423 raise KeyError("unique key cannot be None or blank for index %s" % ind.attr, obj) 424 if getattr(obj, ind.attr) in ind: 425 raise KeyError("duplicate unique key value '%s' for index %s" % (getattr(obj,ind.attr), ind.attr), obj) 426 427 self.obs.append(obj) 428 for attr, ind in self._indexes.items(): 429 obval = getattr(obj, attr) 430 ind[obval] = obj
431
432 - def insert_many(self, it):
433 """Inserts a collection of objects into the table.""" 434 for ob in it: 435 self.insert(ob)
436
437 - def remove(self, ob):
438 """Removes an object from the table. If object is not in the table, then 439 no action is taken and no exception is raised.""" 440 # remove from indexes 441 for attr,ind in self._indexes.items(): 442 ind.remove(ob) 443 444 # remove from main object list 445 self.obs.remove(ob)
446
447 - def remove_many(self, it):
448 """Removes a collection of objects from the table.""" 449 for ob in it: 450 self.remove(ob)
451
452 - def _query_attr_sort_fn(self, attr_val):
453 attr,val = attr_val 454 if attr in self._indexes: 455 idx = self._indexes[attr] 456 if v in idx: 457 return len(idx[v]) 458 else: 459 return 0 460 else: 461 return 1e9
462
463 - def query(self, **kwargs):
464 """Retrieves matching objects from the table, based on given 465 named parameters. If multiple named parameters are given, then 466 only objects that satisfy all of the query criteria will be returned. 467 468 Special kwargs: 469 - C{_orderby="attr,..."} - resulting table should sort content objects 470 by the C{attr}s given in a comma-separated string; to sort in 471 descending order, reference the attribute as C{attr desc}. 472 473 @param **kwargs: attributes for selecting records, given as additional 474 named arguments of the form C{attrname="attrvalue"}. 475 @return: a new Table containing the matching objects 476 """ 477 # extract meta keys 478 flags = [(k,v) for k,v in kwargs.items() if k.startswith("_")] 479 for f,v in flags: 480 del kwargs[f] 481 482 if kwargs: 483 ret = self.copy_template() 484 first = True 485 486 # order query criteria in ascending order of number of matching items 487 # for each individual given attribute; this will minimize the number 488 # of filtering records that each subsequent attribute will have to 489 # handle 490 kwargs = kwargs.items() 491 if len(kwargs) > 1 and len(self.obs) > 100: 492 kwargs = sorted(kwargs, key=self._query_attr_sort_fn) 493 for k,v in kwargs: 494 if k in flags: 495 continue 496 if first: 497 if k in self._indexes: 498 ret.insert_many(self._indexes[k][v]) 499 else: 500 ret.insert_many( r for r in self.obs 501 if hasattr(r,k) and getattr(r,k) == v ) 502 else: 503 if k in ret._indexes: 504 newret = ret.copy_template() 505 newret.insert_many(ret._indexes[k][v]) 506 ret = newret 507 else: 508 retobs = ret.obs[:] 509 ret.remove_many( o for o in retobs 510 if not hasattr(r,k) 511 or (getattr(r,k) != v) ) 512 first = False 513 else: 514 ret = self.clone() 515 516 for f,v in flags: 517 if f == "_orderby": 518 attrs = [s.strip() for s in v.split(',')] 519 attr_orders = [(a.split()+['asc',])[:2] for a in attrs][::-1] 520 for attr,order in attr_orders: 521 ret.obs.sort(key=lambda ob:getattr(ob,attr), reverse=(order=="desc")) 522 523 return ret
524
525 - def delete(self, **kwargs):
526 """Deletes matching objects from the table, based on given 527 named parameters. If multiple named parameters are given, then 528 only objects that satisfy all of the query criteria will be removed. 529 @param **kwargs: attributes for selecting records, given as additional 530 named arguments of the form C{attrname="attrvalue"}. 531 @return: the number of objects removed from the table 532 """ 533 if not kwargs: 534 return 0 535 536 affected = self.query(**kwargs) 537 self.remove_many(affected) 538 return len(affected)
539
540 - def where(self, wherefn, maxrecs=0):
541 """An alternative to L{query}, using a matching predicate function to 542 determine whether a given object matches the query or not. You must use 543 C{where} in place of C{query} if you want to query using inequalities or more 544 complex matching criteria than simple C{attribute=value}. 545 @param wherefn: a method or lambda that returns a boolean result, as in:: 546 547 lambda ob : ob.unitprice > 10 548 549 @type wherefn: callable(object) returning boolean 550 @param maxrecs: if only the first 'n' records are needed, then C{where} will 551 stop after locating 'n' matching records 552 @type maxrecs: int 553 @returns: a new Table containing the matching records 554 """ 555 ret = self.copy_template() 556 if maxrecs: 557 ret.insert_many(islice(ifilter(wherefn, self.obs), 0, maxrecs)) 558 else: 559 ret.insert_many(ifilter(wherefn, self.obs)) 560 return ret
561
562 - def join(self, other, attrlist=None, **kwargs):
563 """ 564 Join the objects of one table with the objects of another, based on the given 565 matching attributes in the named arguments. The attrlist specifies the attributes to 566 be copied from the source tables - if omitted, all attributes will be copied. Entries 567 in the attrlist may be single attribute names, or if there are duplicate names in both 568 tables, then a C{(table,attributename)} tuple can be given to disambiguate which 569 attribute is desired. A C{(table,attributename,alias)} tuple can also be passed, to 570 rename an attribute from a source table. 571 572 This method may be called directly, or can be constructed using the L{join_on} method and 573 the '+' operator. Using this syntax, the join is specified using C{table.join_on("xyz")} 574 to create a JoinTerm containing both table and joining attribute. Multiple JoinTerm 575 or tables can be added to construct a compound join expression. When complete, the 576 join expression gets executed by calling the resulting join definition, 577 using C{join_expression([attrlist])}. 578 579 @param other: other table to join to 580 @param attrlist: list of attributes to be copied to the new joined table; if 581 none provided, all attributes of both tables will be used (taken from the first 582 object in each table) 583 @type attrlist: string, or list of strings or C{(table,attribute[,alias])} tuples 584 (list may contain both strings and tuples) 585 @param **kwargs: attributes to join on, given as additional named arguments 586 of the form C{table1attr="table2attr"}, or a dict mapping attribute names. 587 @returns: a new Table containing the joined data as new DataObjects 588 """ 589 thiscol,othercol = kwargs.items()[0] 590 591 retname = ("(%s:%s^%s:%s)" % 592 (self.table_name, thiscol, other.table_name, othercol)) 593 # make sure both tables contain records to join - if not, just return empty list 594 if not (self.obs and other.obs): 595 return Table(retname) 596 597 if isinstance(attrlist, basestring): 598 attrlist = attrlist.split() 599 600 # expand attrlist to full (table, name, alias) tuples 601 thisnames = set(_object_attrnames(self.obs[0])) 602 othernames = set(_object_attrnames(other.obs[0])) 603 fullcols = [] 604 if attrlist is not None: 605 for col in attrlist: 606 if isinstance(col, tuple): 607 # assume col contains at least (table, colname), fill in alias if missing 608 # to be same as colname 609 fullcols.append((col + (col[1],))[:3]) 610 else: 611 if col in thisnames: 612 fullcols.append( (self, col, col) ) 613 elif col in othernames: 614 fullcols.append( (other, col, col) ) 615 else: 616 pass 617 else: 618 fullcols = [(self,n,n) for n in thisnames] 619 fullcols += [(other,n,n) for n in othernames] 620 621 thiscols = list(ifilter(lambda o:o[0] is self, fullcols)) 622 othercols = list(ifilter(lambda o:o[0] is other, fullcols)) 623 624 thiscolindex = othercolindex = None 625 if thiscol in self._indexes: 626 thiscolindex = self._indexes[thiscol] 627 if othercol in other._indexes: 628 othercolindex = other._indexes[othercol] 629 if not(thiscolindex and othercolindex): 630 raise ValueError("can only join on indexed attributes") 631 632 # use table with fewer keys to drive join 633 if len(thiscolindex) < len(othercolindex): 634 shortindex, longindex = (thiscolindex, othercolindex) 635 swap = False 636 else: 637 shortindex, longindex = (othercolindex, thiscolindex) 638 swap = True 639 640 # find matching rows 641 matchingrows = [] 642 for key,rows in shortindex.items(): 643 if key in longindex: 644 if swap: 645 matchingrows.append( (longindex[key], rows) ) 646 else: 647 matchingrows.append( (rows, longindex[key]) ) 648 649 joinrows = [] 650 for thisrows,otherrows in matchingrows: 651 for trow,orow in product(thisrows,otherrows): 652 retobj = DataObject() 653 for _,c,a in thiscols: 654 setattr(retobj, a, getattr(trow,c)) 655 for _,c,a in othercols: 656 setattr(retobj, a, getattr(orow,c)) 657 joinrows.append(retobj) 658 659 ret = Table(retname) 660 for tbl,collist in zip([self,other],[thiscols,othercols]): 661 for _,c,a in collist: 662 if c in tbl._indexes: 663 ret.create_index(a) # no unique indexes in join results 664 ret.insert_many(joinrows) 665 return ret
666
667 - def join_on(self, attr):
668 """Creates a JoinTerm in preparation for joining with another table, to 669 indicate what attribute should be used in the join. Only indexed attributes 670 may be used in a join. 671 @param attr: attribute name to join from this table (may be different 672 from the attribute name in the table being joined to) 673 @type attr: string 674 @returns: L{JoinTerm}""" 675 if attr not in self._indexes: 676 raise ValueError("can only join on indexed attributes") 677 return JoinTerm(self, attr)
678
679 - def pivot(self, attrlist):
680 """Pivots the data using the given attributes, returning a L{PivotTable}. 681 @param attrlist: list of attributes to be used to construct the pivot table 682 @type attrlist: list of strings, or string of space-delimited attribute names 683 """ 684 if isinstance(attrlist, basestring): 685 attrlist = attrlist.split() 686 if all(a in self._indexes for a in attrlist): 687 return PivotTable(self,[],attrlist) 688 else: 689 raise ValueError("pivot can only be called using indexed attributes")
690
691 - def csv_import(self, csv_source):
692 """Imports the contents of a CSV-formatted file into this table. 693 @param csv_source: CSV file - if a string is given, the file with that name will be 694 opened, read, and closed; if a file object is given, then that object 695 will be read as-is, and left for the caller to be closed. 696 @type csv_source: string or file 697 """ 698 close_on_exit = False 699 if isinstance(csv_source, basestring): 700 csv_source = open(csv_source) 701 close_on_exit = True 702 try: 703 csvdata = csv.DictReader(csv_source) 704 self.insert_many(DataObject(**s) for s in csvdata) 705 finally: 706 if close_on_exit: 707 csv_source.close()
708
709 - def csv_export(self, csv_dest, fieldnames=None):
710 """Exports the contents of the table to a CSV-formatted file. 711 @param csv_dest: CSV file - if a string is given, the file with that name will be 712 opened, written, and closed; if a file object is given, then that object 713 will be written as-is, and left for the caller to be closed. 714 @type csv_dest: string or file 715 @param fieldnames: attribute names to be exported; can be given as a single 716 string with space-delimited names, or as a list of attribute names 717 """ 718 close_on_exit = False 719 if isinstance(csv_dest, basestring): 720 csv_dest = open(csv_dest,'wb') 721 close_on_exit = True 722 try: 723 if fieldnames is None: 724 fieldnames = list(_object_attrnames(self.obs[0])) 725 if isinstance(fieldnames, basestring): 726 fieldnames = fieldnames.split() 727 728 csv_dest.write(','.join(fieldnames) + '\n') 729 csvout = csv.DictWriter(csv_dest, fieldnames, extrasaction='ignore') 730 if hasattr(self.obs[0], "__dict__"): 731 for o in self.obs: 732 csvout.writerow(o.__dict__) 733 else: 734 for o in self.obs: 735 row = dict(starmap(lambda obj, fld: (fld, getattr(obj, fld)), 736 zip(repeat(o), fieldnames))) 737 csvout.writerow(row) 738 finally: 739 if close_on_exit: 740 csv_dest.close()
741 742
743 -class PivotTable(Table):
744 """Enhanced Table containing pivot results from calling table.pivot(). 745 """
746 - def __init__(self, parent, attr_val_path, attrlist):
747 """PivotTable initializer - do not create these directly, use 748 L{Table.pivot}. 749 """ 750 super(PivotTable,self).__init__() 751 self._attr_path = attr_val_path[:] 752 self._pivot_attrs = attrlist[:] 753 self._subtable_dict = {} 754 755 for k,v in parent._indexes.items(): 756 self._indexes[k] = v.copy_template() 757 if not attr_val_path: 758 self.insert_many(parent.obs) 759 else: 760 attr,val = attr_val_path[-1] 761 self.insert_many(parent.query(**{attr:val})) 762 parent._subtable_dict[val] = self 763 764 if len(attrlist) > 0: 765 this_attr = attrlist[0] 766 sub_attrlist = attrlist[1:] 767 ind = parent._indexes[this_attr] 768 self.subtables = [ PivotTable(self, 769 attr_val_path + [(this_attr,k)], 770 sub_attrlist) for k in sorted(ind.keys()) ] 771 else: 772 self.subtables = []
773
774 - def __getitem__(self,val):
775 if self._subtable_dict: 776 return self._subtable_dict[val] 777 else: 778 return super(PivotTable,self).__getitem__(val)
779
780 - def keys(self):
781 return sorted(self._subtable_dict.keys())
782
783 - def items(self):
784 return sorted(self._subtable_dict.items())
785
786 - def values(self):
787 return self._subtable_dict.values()
788
789 - def pivot_key(self):
790 """Return the set of attribute-value pairs that define the contents of this 791 table within the original source table. 792 """ 793 return self._attr_path
794
795 - def pivot_key_str(self):
796 """Return the pivot_key as a displayable string. 797 """ 798 return '/'.join("%s:%s" % (attr,key) for attr,key in self._attr_path)
799
800 - def has_subtables(self):
801 """Return whether this table has further subtables. 802 """ 803 return bool(self.subtables)
804
805 - def dump(self, out=sys.stdout, row_fn=repr, maxrecs=-1, indent=0):
806 """Dump out the contents of this table in a nested listing. 807 @param out: output stream to write to 808 @param row_fn: function to call to display individual rows 809 @param maxrecs: number of records to show at deepest level of pivot (-1=show all) 810 @param indent: current nesting level 811 """ 812 NL = '\n' 813 if indent: 814 out.write(" "*indent + self.pivot_key_str()) 815 else: 816 out.write("Pivot: %s" % ','.join(self._pivot_attrs)) 817 out.write(NL) 818 if self.has_subtables(): 819 for sub in self.subtables: 820 if sub: 821 sub.dump(out, row_fn, maxrecs, indent+1) 822 else: 823 if maxrecs >= 0: 824 showslice = slice(0,maxrecs) 825 else: 826 showslice = slice(None,None) 827 for r in self.obs[showslice]: 828 out.write(" "*(indent+1) + row_fn(r) + NL) 829 out.flush()
830
831 - def dump_counts(self, out=sys.stdout):
832 """Dump out the summary counts of entries in this pivot table as a tabular listing. 833 @param out: output stream to write to 834 """ 835 if len(self._pivot_attrs) == 1: 836 out.write("Pivot Summary: %s\n" % ','.join(self._pivot_attrs)) 837 maxkeylen = max(len(str(k)) for k in self.keys()) 838 for sub in self.subtables: 839 out.write("%-*.*s " % (maxkeylen,maxkeylen,sub._attr_path[-1][1])) 840 out.write("%7d\n" % len(sub)) 841 elif len(self._pivot_attrs) == 2: 842 out.write("Pivot Summary: %s\n" % ','.join(self._pivot_attrs)) 843 maxkeylen = max(max(len(str(k)) for k in self.keys()),5) 844 maxvallen = max(max(len(str(k)) for k in self.subtables[0].keys()),7) 845 keytally = dict((k,0) for k in self.subtables[0].keys()) 846 out.write("%*s " % (maxkeylen,'')) 847 out.write(' '.join("%*.*s" % (maxvallen,maxvallen,k) for k in self.subtables[0].keys())) 848 out.write(' Total\n') 849 for sub in self.subtables: 850 out.write("%-*.*s " % (maxkeylen,maxkeylen,sub._attr_path[-1][1])) 851 for ssub in sub.subtables: 852 out.write("%*d " % (maxvallen,len(ssub))) 853 keytally[ssub._attr_path[-1][1]] += len(ssub) 854 out.write("%7d\n" % len(sub)) 855 out.write('%-*.*s ' % (maxkeylen,maxkeylen,"Total")) 856 out.write(' '.join("%*d" % (maxvallen,tally) for k,tally in sorted(keytally.items()))) 857 out.write(" %7d\n" % sum(tally for k,tally in keytally.items())) 858 else: 859 raise ValueError("can only dump summary counts for 1 or 2-attribute pivots")
860
861 - def summary_counts(self):
862 """Dump out the summary counts of this pivot table as a Table. 863 """ 864 ret = Table() 865 topattr = self._pivot_attrs[0] 866 for attr in self._pivot_attrs: 867 ret.create_index(attr) 868 if len(self._pivot_attrs) == 1: 869 for sub in self.subtables: 870 subattr,subval = sub._attr_path[-1] 871 ret.insert(DataObject(**{subattr:subval, 'Count':len(sub)})) 872 elif len(self._pivot_attrs) == 2: 873 for sub in self.subtables: 874 for ssub in sub.subtables: 875 attrdict = dict(ssub._attr_path) 876 attrdict['Count'] = len(ssub) 877 ret.insert(DataObject(**attrdict)) 878 elif len(self._pivot_attrs) == 3: 879 for sub in self.subtables: 880 for ssub in sub.subtables: 881 for sssub in ssub.subtables: 882 attrdict = dict(sssub._attr_path) 883 attrdict['Count'] = len(sssub) 884 ret.insert(DataObject(**attrdict)) 885 else: 886 raise ValueError("can only dump summary counts for 1 or 2-attribute pivots") 887 return ret
888
889 -class JoinTerm(object):
890 """Temporary object created while composing a join across tables using 891 L{Table.join_on} and '+' addition. JoinTerm's are usually created by 892 calling join_on on a Table object, as in:: 893 894 customers.join_on("id") + orders.join_on("custid") 895 896 This join expression would set up the join relationship 897 equivalent to:: 898 899 customers.join(orders, id="custid") 900 901 If tables are being joined on attributes that have the same name in 902 both tables, then a join expression could be created by adding a 903 JoinTerm of one table directly to the other table:: 904 905 customers.join_on("custid") + orders 906 907 Once the join expression is composed, the actual join is performed 908 using function call notation:: 909 910 customerorders = customers.join_on("custid") + orders 911 for custord in customerorders(): 912 print custord 913 914 When calling the join expression, you can optionally specify a 915 list of attributes as defined in L{Table.join}. 916 """
917 - def __init__(self, sourceTable, joinfield):
918 self.sourcetable = sourceTable 919 self.joinfield = joinfield 920 self.jointo = None
921
922 - def __add__(self, other):
923 if isinstance(other, Table): 924 other = other.join_on(self.joinfield) 925 if isinstance(other, JoinTerm): 926 if self.jointo is None: 927 if other.jointo is None: 928 self.jointo = other 929 else: 930 self.jointo = other() 931 return self 932 else: 933 if other.jointo is None: 934 return self() + other 935 else: 936 return self() + other() 937 raise ValueError("cannot add object of type '%s' to JoinTerm" % other.__class__.__name__)
938
939 - def __radd__(self, other):
940 if isinstance(other, Table): 941 return other.join_on(self.joinfield) + self 942 raise ValueError("cannot add object of type '%s' to JoinTerm" % other.__class__.__name__)
943
944 - def __call__(self, attrs=None):
945 if self.jointo: 946 other = self.jointo 947 if isinstance(other, Table): 948 other = other.join_on(self.joinfield) 949 ret = self.sourcetable.join(other.sourcetable, attrs, 950 **{self.joinfield : other.joinfield}) 951 return ret 952 else: 953 return self.sourcetable.query()
954
955 - def join_on(self, col):
956 return self().join_on(col)
957 958 959 if __name__ == "__main__": 960 961 # import json in Python 2 or 3 compatible forms 962 from functools import partial 963 try: 964 import simplejson as json 965 json_dumps = partial(json.dumps, indent=' ') 966 except ImportError: 967 import json 968 json_dumps = partial(json.dumps, indent=2) 969 970 971 rawdata = """\ 972 Phoenix:AZ:85001:KPHX 973 Phoenix:AZ:85001:KPHY 974 Phoenix:AZ:85001:KPHA 975 Dallas:TX:75201:KDFW""".splitlines() 976 977 # load miniDB 978 stations = Table() 979 #~ stations.create_index("city") 980 stations.create_index("stn", unique=True) 981 982 fields = "city state zip stn".split() 983 for d in rawdata: 984 ob = DataObject() 985 for k,v in zip(fields, d.split(':')): 986 setattr(ob,k,v.strip()) 987 stations.insert(ob) 988 989 # perform some queries and deletes 990 for queryargs in [ 991 dict(city="Phoenix"), 992 dict(city="Phoenix", stn="KPHX"), 993 dict(stn="KPHA", city="Phoenix"), 994 dict(state="TX"), 995 dict(city="New York"), 996 dict(city="Phoenix", _orderby="stn"), 997 dict(city="Phoenix", _orderbydesc="stn"), 998 ]: 999 print queryargs, 1000 result = stations.query(**queryargs) 1001 print len(result) 1002 for r in result: print r 1003 print 1004 #~ print stations.delete(city="Phoenix") 1005 #~ print stations.delete(city="Boston") 1006 print list(stations.query()) 1007 print 1008 1009 amfm = Table() 1010 amfm.create_index("stn", unique=True) 1011 amfm.insert(DataObject(stn="KPHY", band="AM")) 1012 amfm.insert(DataObject(stn="KPHX", band="FM")) 1013 amfm.insert(DataObject(stn="KPHA", band="FM")) 1014 amfm.insert(DataObject(stn="KDFW", band="FM")) 1015 1016 try: 1017 amfm.insert(DataObject(stn="KPHA", band="AM")) 1018 except KeyError: 1019 print "duplicate key not allowed" 1020 1021 print 1022 for rec in (stations.join_on("stn") + amfm.join_on("stn") 1023 )(["stn", "city", (amfm,"band","AMFM"), 1024 (stations,"state","st")]).query(_orderby="AMFM"): 1025 print repr(rec) 1026 1027 print 1028 for rec in (stations.join_on("stn") + amfm.join_on("stn") 1029 )(["stn", "city", (amfm,"band"), (stations,"state","st")]): 1030 print json_dumps(rec.__dict__) 1031 1032 print 1033 for rec in (stations.join_on("stn") + amfm.join_on("stn"))(): 1034 print json_dumps(rec.__dict__) 1035 1036 print 1037 stations.create_index("state") 1038 pivot = stations.pivot("state") 1039 pivot.dump_counts() 1040 1041 print 1042 amfm.create_index("band") 1043 pivot = (stations.join_on("stn") + amfm)().pivot("state band") 1044 pivot.dump_counts() 1045