1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31 __doc__ = """\
32
33 C{littletable} - a Python module to give ORM-like access to a collection of objects
34
35 The C{littletable} module provides a low-overhead, schema-less, in-memory database access to a
36 collection of user objects. C{littletable} provides a L{DataObject} class for ad hoc creation
37 of semi-immutable objects that can be stored in a C{littletable} L{Table}.
38
39 In addition to basic ORM-style insert/remove/query/delete access to the contents of a
40 Table, C{littletable} offers:
41 - simple indexing for improved retrieval performance, and optional enforcing key uniqueness
42 - access to objects using indexed attributes
43 - simplified joins using '+' operator syntax between annotated Tables
44 - the result of any query or join is a new first-class C{littletable} Table
45
46 C{littletable} Tables do not require an upfront schema definition, but simply work off of the
47 attributes in the stored values, and those referenced in any query parameters.
48
49 Here is a simple C{littletable} data storage/retrieval example::
50
51 from littletable import Table, DataObject
52
53 customers = Table('customers')
54 customers.create_index("id", unique=True)
55 customers.insert(DataObject(id="0010", name="George Jetson"))
56 customers.insert(DataObject(id="0020", name="Wile E. Coyote"))
57 customers.insert(DataObject(id="0030", name="Jonny Quest"))
58
59 catalog = Table('catalog')
60 catalog.create_index("sku", unique=True)
61 catalog.insert(DataObject(sku="ANVIL-001", descr="1000lb anvil", unitofmeas="EA",unitprice=100))
62 catalog.insert(DataObject(sku="BRDSD-001", descr="Bird seed", unitofmeas="LB",unitprice=3))
63 catalog.insert(DataObject(sku="MAGNT-001", descr="Magnet", unitofmeas="EA",unitprice=8))
64 catalog.insert(DataObject(sku="MAGLS-001", descr="Magnifying glass", unitofmeas="EA",unitprice=12))
65
66 wishitems = Table('wishitems')
67 wishitems.create_index("custid")
68 wishitems.create_index("sku")
69 wishitems.insert(DataObject(custid="0020", sku="ANVIL-001"))
70 wishitems.insert(DataObject(custid="0020", sku="BRDSD-001"))
71 wishitems.insert(DataObject(custid="0020", sku="MAGNT-001"))
72 wishitems.insert(DataObject(custid="0030", sku="MAGNT-001"))
73 wishitems.insert(DataObject(custid="0030", sku="MAGLS-001"))
74
75 # print a particular customer name
76 # (unique indexes will return a single item; non-unique
77 # indexes will return a list of all matching items)
78 print customers.id["0030"].name
79
80 # print all items sold by the pound
81 for item in catalog.query(unitofmeas="LB"):
82 print item.sku, item.descr
83
84 # print all items that cost more than 10
85 for item in catalog.where(lambda o : o.unitprice>10):
86 print item.sku, item.descr, item.unitprice
87
88 # join tables to create queryable wishlists collection
89 wishlists = customers.join_on("id") + wishitems.join_on("custid") + catalog.join_on("sku")
90
91 # print all wishlist items with price > 10
92 bigticketitems = wishlists().where(lambda ob : ob.unitprice > 10)
93 for item in bigticketitems:
94 print item
95
96 # list all wishlist items in descending order by price
97 for item in wishlists().query(_orderbydesc="unitprice"):
98 print item
99 """
100
101 __version__ = "0.4"
102 __versionTime__ = "29 Jun 2011 16:36"
103 __author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"
104
105 import sys
106 from collections import defaultdict
107 from itertools import groupby,ifilter,islice,starmap,repeat
108 import csv
109
110 try:
111 from itertools import product
112 except ImportError:
114 for a in aseq:
115 for b in bseq:
116 yield a,b
117
118 try:
119 t = basestring
120 except NameError:
121 basestring = str
122
123 __all__ = ["DataObject", "Table", "JoinTerm", "PivotTable"]
124
126 if hasattr(obj, "__dict__"):
127
128 return obj.__dict__.keys()
129 elif isinstance(obj, tuple) and hasattr(obj, "_fields"):
130
131 return obj._fields
132 elif hasattr(obj, "__slots__"):
133 return obj.__slots__
134 else:
135 raise ValueError("object with unknown attributes")
136
138 """A generic semi-mutable object for storing data values in a table. Attributes
139 can be set by passing in named arguments in the constructor, or by setting them
140 as C{object.attribute = value}. New attributes can be added any time, but updates
141 are ignored. Table joins are returned as a Table of DataObjects."""
143 if kwargs:
144 self.__dict__.update(kwargs)
146 return repr(self.__dict__)
152 if hasattr(self,k):
153 return getattr(self,k)
154 else:
155 raise KeyError("object has no such attribute " + k)
156
159 self.attr = attr
160 self.obs = defaultdict(list)
161 self.is_unique = False
163 self.obs[k].append(v)
165 return self.obs.get(k,[])
169 return iter(self.obs)
171 return sorted(self.obs.keys())
173 return self.obs.items()
175 try:
176 k = getattr(obj, self.attr)
177 self.obs[k].remove(obj)
178 except (ValueError,AttributeError,KeyError):
179 pass
181 return key in self.obs
183 return self.__class__(self.attr)
184
186 - def __init__(self, attr, accept_none=False):
187 self.attr = attr
188 self.obs = {}
189 self.is_unique = True
190 self.accept_none = accept_none
191 self.none_values = set()
193 if k:
194 if k not in self.obs:
195 self.obs[k] = v
196 else:
197 raise KeyError("duplicate key value %s" % k)
198 else:
199 self.none_values.add(v)
201 if k:
202 return [self.obs.get(k)] if k in self.obs else []
203 else:
204 return list(self.none_values)
206 if k:
207 return k in self.obs
208 else:
209 return self.accept_none and self.none_values
211 return sorted(self.obs.keys()) + ([None,] if self.none_values else [])
213 return [(k,[v]) for k,v in self.obs.items()]
215 k = getattr(obj, self.attr)
216 if k:
217 if k in self.obs:
218 del self.obs[k]
219 else:
220 self.none_values.discard(obj)
221
226 return getattr(self._index, attr)
228 ret = Table()
229 if k in self._index:
230 ret.insert_many(self._index[k])
231 return ret
233 return k in self._index
234
239 return getattr(self._index, attr)
241 return k in self._index
243 if k:
244 return self._index[k][0]
245 else:
246 ret = Table()
247 if k in self._index:
248 ret.insert_many(self._index[k])
249 return ret
250
251
253 """Table is the main class in C{littletable}, for representing a collection of DataObjects or
254 user-defined objects with publicly accessible attributes or properties. Tables can be:
255 - created, with an optional name, using standard Python L{C{Table() constructor}<__init__>}
256 - indexed, with multiple indexes, with unique or non-unique values, see L{create_index}
257 - queried, specifying values to exact match in the desired records, see L{query}
258 - filtered (using L{where}), using a simple predicate function to match desired records;
259 useful for selecting using inequalities or compound conditions
260 - accessed directly for keyed values, using C{table.indexattribute[key]} - see L{__getattr__}
261 - joined, using L{join_on} to identify attribute to be used for joining with another table, and
262 L{join} or operator '+' to perform the actual join
263 - pivoted, using L{pivot} to create a nested structure of sub-tables grouping objects
264 by attribute values
265 - L{imported<csv_import>}/L{exported<csv_export>} to CSV-format files
266 Queries and joins return their results as new Table objects, so that queries and joins can
267 be easily performed as a succession of operations.
268 """
270 """Create a new, empty Table.
271 @param table_name: name for Table
272 @type table_name: string (optional)
273 """
274 self.table_name = table_name
275 self.obs = []
276 self._indexes = {}
277
279 """Return the number of objects in the Table."""
280 return len(self.obs)
281
283 """Create an iterator over the objects in the Table."""
284 return iter(self.obs)
285
287 """Provides direct indexed/sliced access to the Table's underlying list of objects."""
288 return self.obs[i]
289
291 """A quick way to query for matching records using their indexed attributes. The attribute
292 name is used to locate the index, and returns a wrapper on the index. This wrapper provides
293 dict-like access to the underlying records in the table, as in::
294
295 employees.socsecnum["000-00-0000"]
296 customers.zipcode["12345"]
297
298 The behavior differs slightly for unique and non-unique indexes:
299 - if the index is unique, then retrieving a matching object, will return just the object;
300 if there is no matching object, C{KeyError} is raised
301 - if the index is non-unique, then all matching objects will be returned in a new Table,
302 just as if a regular query had been performed; if no objects match the key value, an empty
303 Table is returned and no exception is raised.
304
305 If there is no index defined for the given attribute, then C{AttributeError} is raised.
306 """
307 if attr in self._indexes:
308 ret = self._indexes[attr]
309 if isinstance(ret, _UniqueObjIndex):
310 ret = _UniqueObjIndexWrapper(ret)
311 if isinstance(ret, _ObjIndex):
312 ret = _ObjIndexWrapper(ret)
313 return ret
314 raise AttributeError("Table '%s' has no index '%s'" %
315 (self.table_name, attr))
316
318 return bool(self.obs)
319
320 __nonzero__ = __bool__
321
323 """A simple way to assign a name to a table, such as those
324 dynamically created by joins and queries.
325 @param table_name: name for Table
326 @type table_name: string
327 """
328 self.table_name = table_name
329 return self
330
332 """Create empty copy of the current table, with copies of all
333 index definitions.
334 """
335 ret = Table(self.table_name)
336 for k,v in self._indexes.items():
337 ret._indexes[k] = v.copy_template()
338 return ret
339
341 """Create full copy of the current table, including table contents
342 and index definitions.
343 """
344 ret = self.copy_template()
345 ret.insert_many(self.obs)
346 return ret
347
348 - def create_index(self, attr, unique=False, accept_none=False):
349 """Create a new index on a given attribute.
350 If C{unique} is True and records are found in the table with duplicate
351 attribute values, the index is deleted and C{KeyError} is raised.
352
353 If the table already has an index on the given attribute, then no
354 action is taken and no exception is raised.
355 @param attr: the attribute to be used for indexed access and joins
356 @type attr: string
357 @param unique: flag indicating whether the indexed field values are
358 expected to be unique across table entries
359 @type unique: boolean
360 @param accept_none: flag indicating whether None is an acceptable
361 value for this attribute
362 @type accept_none: boolean
363 """
364 if attr in self._indexes:
365 return
366
367 if unique:
368 self._indexes[attr] = _UniqueObjIndex(attr,accept_none)
369 else:
370 self._indexes[attr] = _ObjIndex(attr)
371 accept_none = True
372 ind = self._indexes[attr]
373 try:
374 for obj in self.obs:
375 if hasattr(obj, attr):
376 obval = getattr(obj, attr) or None
377 else:
378 obval = None
379 if obval or accept_none:
380 ind[obval] = obj
381 else:
382 raise KeyError("None is not an allowed key")
383
384 except KeyError:
385 del self._indexes[attr]
386 raise
387
389 """Deletes an index from the Table. Can be used to drop and rebuild an index,
390 or to convert a non-unique index to a unique index, or vice versa.
391 @param attr: name of an indexed attribute
392 @type attr: string
393 """
394 if attr in self._indexes:
395 del self._indexes[attr]
396
398 """Insert a new object into this Table.
399 @param obj: any Python object
400 Objects can be constructed using the defined DataObject type, or they can
401 be any Python object that does not use the Python C{__slots__} feature; C{littletable}
402 introspect's the object's C{__dict__} or C{_fields} attributes to obtain join and
403 index attributes and values.
404
405 If the table contains a unique index, and the record to be inserted would add
406 a duplicate value for the indexed attribute, then C{KeyError} is raised, and the
407 object is not inserted.
408
409 If the table has no unique indexes, then it is possible to insert duplicate
410 objects into the table.
411 """
412
413
414 uniqueIndexes = [ind for ind in self._indexes.values() if ind.is_unique]
415 if any((getattr(obj, ind.attr, None) is None and not ind.accept_none)
416 or (
417 hasattr(obj, ind.attr) and getattr(obj, ind.attr) in ind
418 )
419 for ind in uniqueIndexes):
420
421 for ind in uniqueIndexes:
422 if (getattr(obj, ind.attr, None) is None and not ind.accept_none):
423 raise KeyError("unique key cannot be None or blank for index %s" % ind.attr, obj)
424 if getattr(obj, ind.attr) in ind:
425 raise KeyError("duplicate unique key value '%s' for index %s" % (getattr(obj,ind.attr), ind.attr), obj)
426
427 self.obs.append(obj)
428 for attr, ind in self._indexes.items():
429 obval = getattr(obj, attr)
430 ind[obval] = obj
431
433 """Inserts a collection of objects into the table."""
434 for ob in it:
435 self.insert(ob)
436
438 """Removes an object from the table. If object is not in the table, then
439 no action is taken and no exception is raised."""
440
441 for attr,ind in self._indexes.items():
442 ind.remove(ob)
443
444
445 self.obs.remove(ob)
446
448 """Removes a collection of objects from the table."""
449 for ob in it:
450 self.remove(ob)
451
453 attr,v = attr_val
454 if attr in self._indexes:
455 idx = self._indexes[attr]
456 if v in idx:
457 return len(idx[v])
458 else:
459 return 0
460 else:
461 return 1e9
462
463 - def query(self, **kwargs):
464 """Retrieves matching objects from the table, based on given
465 named parameters. If multiple named parameters are given, then
466 only objects that satisfy all of the query criteria will be returned.
467
468 Special kwargs:
469 - C{_orderby="attr,..."} - resulting table should sort content objects
470 by the C{attr}s given in a comma-separated string; to sort in
471 descending order, reference the attribute as C{attr desc}.
472
473 @param **kwargs: attributes for selecting records, given as additional
474 named arguments of the form C{attrname="attrvalue"}.
475 @return: a new Table containing the matching objects
476 """
477
478 flags = [(k,v) for k,v in kwargs.items() if k.startswith("_")]
479 for f,v in flags:
480 del kwargs[f]
481
482 if kwargs:
483 ret = self.copy_template()
484 first = True
485
486
487
488
489
490 kwargs = kwargs.items()
491 if len(kwargs) > 1 and len(self.obs) > 100:
492 kwargs = sorted(kwargs, key=self._query_attr_sort_fn)
493 for k,v in kwargs:
494 if k in flags:
495 continue
496 if first:
497 if k in self._indexes:
498 ret.insert_many(self._indexes[k][v])
499 else:
500 ret.insert_many( r for r in self.obs
501 if hasattr(r,k) and getattr(r,k) == v )
502 else:
503 if k in ret._indexes:
504 newret = ret.copy_template()
505 newret.insert_many(ret._indexes[k][v])
506 ret = newret
507 else:
508 retobs = ret.obs[:]
509 ret.remove_many( o for o in retobs
510 if not hasattr(r,k)
511 or (getattr(r,k) != v) )
512 first = False
513 else:
514 ret = self.clone()
515
516 for f,v in flags:
517 if f == "_orderby":
518 attrs = [s.strip() for s in v.split(',')]
519 attr_orders = [(a.split()+['asc',])[:2] for a in attrs][::-1]
520 for attr,order in attr_orders:
521 ret.obs.sort(key=lambda ob:getattr(ob,attr), reverse=(order=="desc"))
522
523 return ret
524
526 """Deletes matching objects from the table, based on given
527 named parameters. If multiple named parameters are given, then
528 only objects that satisfy all of the query criteria will be removed.
529 @param **kwargs: attributes for selecting records, given as additional
530 named arguments of the form C{attrname="attrvalue"}.
531 @return: the number of objects removed from the table
532 """
533 if not kwargs:
534 return 0
535
536 affected = self.query(**kwargs)
537 self.remove_many(affected)
538 return len(affected)
539
540 - def where(self, wherefn, maxrecs=0):
541 """An alternative to L{query}, using a matching predicate function to
542 determine whether a given object matches the query or not. You must use
543 C{where} in place of C{query} if you want to query using inequalities or more
544 complex matching criteria than simple C{attribute=value}.
545 @param wherefn: a method or lambda that returns a boolean result, as in::
546
547 lambda ob : ob.unitprice > 10
548
549 @type wherefn: callable(object) returning boolean
550 @param maxrecs: if only the first 'n' records are needed, then C{where} will
551 stop after locating 'n' matching records
552 @type maxrecs: int
553 @returns: a new Table containing the matching records
554 """
555 ret = self.copy_template()
556 if maxrecs:
557 ret.insert_many(islice(ifilter(wherefn, self.obs), 0, maxrecs))
558 else:
559 ret.insert_many(ifilter(wherefn, self.obs))
560 return ret
561
562 - def join(self, other, attrlist=None, **kwargs):
563 """
564 Join the objects of one table with the objects of another, based on the given
565 matching attributes in the named arguments. The attrlist specifies the attributes to
566 be copied from the source tables - if omitted, all attributes will be copied. Entries
567 in the attrlist may be single attribute names, or if there are duplicate names in both
568 tables, then a C{(table,attributename)} tuple can be given to disambiguate which
569 attribute is desired. A C{(table,attributename,alias)} tuple can also be passed, to
570 rename an attribute from a source table.
571
572 This method may be called directly, or can be constructed using the L{join_on} method and
573 the '+' operator. Using this syntax, the join is specified using C{table.join_on("xyz")}
574 to create a JoinTerm containing both table and joining attribute. Multiple JoinTerm
575 or tables can be added to construct a compound join expression. When complete, the
576 join expression gets executed by calling the resulting join definition,
577 using C{join_expression([attrlist])}.
578
579 @param other: other table to join to
580 @param attrlist: list of attributes to be copied to the new joined table; if
581 none provided, all attributes of both tables will be used (taken from the first
582 object in each table)
583 @type attrlist: string, or list of strings or C{(table,attribute[,alias])} tuples
584 (list may contain both strings and tuples)
585 @param **kwargs: attributes to join on, given as additional named arguments
586 of the form C{table1attr="table2attr"}, or a dict mapping attribute names.
587 @returns: a new Table containing the joined data as new DataObjects
588 """
589 thiscol,othercol = kwargs.items()[0]
590
591 retname = ("(%s:%s^%s:%s)" %
592 (self.table_name, thiscol, other.table_name, othercol))
593
594 if not (self.obs and other.obs):
595 return Table(retname)
596
597 if isinstance(attrlist, basestring):
598 attrlist = attrlist.split()
599
600
601 thisnames = set(_object_attrnames(self.obs[0]))
602 othernames = set(_object_attrnames(other.obs[0]))
603 fullcols = []
604 if attrlist is not None:
605 for col in attrlist:
606 if isinstance(col, tuple):
607
608
609 fullcols.append((col + (col[1],))[:3])
610 else:
611 if col in thisnames:
612 fullcols.append( (self, col, col) )
613 elif col in othernames:
614 fullcols.append( (other, col, col) )
615 else:
616 pass
617 else:
618 fullcols = [(self,n,n) for n in thisnames]
619 fullcols += [(other,n,n) for n in othernames]
620
621 thiscols = list(ifilter(lambda o:o[0] is self, fullcols))
622 othercols = list(ifilter(lambda o:o[0] is other, fullcols))
623
624 thiscolindex = othercolindex = None
625 if thiscol in self._indexes:
626 thiscolindex = self._indexes[thiscol]
627 if othercol in other._indexes:
628 othercolindex = other._indexes[othercol]
629 if not(thiscolindex and othercolindex):
630 raise ValueError("can only join on indexed attributes")
631
632
633 if len(thiscolindex) < len(othercolindex):
634 shortindex, longindex = (thiscolindex, othercolindex)
635 swap = False
636 else:
637 shortindex, longindex = (othercolindex, thiscolindex)
638 swap = True
639
640
641 matchingrows = []
642 for key,rows in shortindex.items():
643 if key in longindex:
644 if swap:
645 matchingrows.append( (longindex[key], rows) )
646 else:
647 matchingrows.append( (rows, longindex[key]) )
648
649 joinrows = []
650 for thisrows,otherrows in matchingrows:
651 for trow,orow in product(thisrows,otherrows):
652 retobj = DataObject()
653 for _,c,a in thiscols:
654 setattr(retobj, a, getattr(trow,c))
655 for _,c,a in othercols:
656 setattr(retobj, a, getattr(orow,c))
657 joinrows.append(retobj)
658
659 ret = Table(retname)
660 for tbl,collist in zip([self,other],[thiscols,othercols]):
661 for _,c,a in collist:
662 if c in tbl._indexes:
663 ret.create_index(a)
664 ret.insert_many(joinrows)
665 return ret
666
668 """Creates a JoinTerm in preparation for joining with another table, to
669 indicate what attribute should be used in the join. Only indexed attributes
670 may be used in a join.
671 @param attr: attribute name to join from this table (may be different
672 from the attribute name in the table being joined to)
673 @type attr: string
674 @returns: L{JoinTerm}"""
675 if attr not in self._indexes:
676 raise ValueError("can only join on indexed attributes")
677 return JoinTerm(self, attr)
678
679 - def pivot(self, attrlist):
680 """Pivots the data using the given attributes, returning a L{PivotTable}.
681 @param attrlist: list of attributes to be used to construct the pivot table
682 @type attrlist: list of strings, or string of space-delimited attribute names
683 """
684 if isinstance(attrlist, basestring):
685 attrlist = attrlist.split()
686 if all(a in self._indexes for a in attrlist):
687 return PivotTable(self,[],attrlist)
688 else:
689 raise ValueError("pivot can only be called using indexed attributes")
690
691 - def csv_import(self, csv_source, transforms=None):
692 """Imports the contents of a CSV-formatted file into this table.
693 @param csv_source: CSV file - if a string is given, the file with that name will be
694 opened, read, and closed; if a file object is given, then that object
695 will be read as-is, and left for the caller to be closed.
696 @type csv_source: string or file
697 @param transforms: dict of functions by attribute name; if given, each
698 attribute will be transformed using the corresponding transform; if there is no
699 matching transform, the attribute will be read as a string (default); the
700 transform function can also be defined as a (function, default-value) tuple; if
701 there is an Exception raised by the transform function, then the attribute will
702 be set to the given default value
703 @type transforms: dict (optional)
704 """
705 close_on_exit = False
706 if isinstance(csv_source, basestring):
707 csv_source = open(csv_source)
708 close_on_exit = True
709 try:
710 csvdata = csv.DictReader(csv_source)
711 self.insert_many(DataObject(**s) for s in csvdata)
712 if transforms:
713 for attr,fn in transforms.items():
714 default = None
715 if isinstance(fn,tuple):
716 fn,default = fn
717 objfn = lambda obj : fn(getattr(obj,attr))
718 self.compute(attr, objfn, default)
719 finally:
720 if close_on_exit:
721 csv_source.close()
722
724 """Exports the contents of the table to a CSV-formatted file.
725 @param csv_dest: CSV file - if a string is given, the file with that name will be
726 opened, written, and closed; if a file object is given, then that object
727 will be written as-is, and left for the caller to be closed.
728 @type csv_dest: string or file
729 @param fieldnames: attribute names to be exported; can be given as a single
730 string with space-delimited names, or as a list of attribute names
731 """
732 close_on_exit = False
733 if isinstance(csv_dest, basestring):
734 csv_dest = open(csv_dest,'wb')
735 close_on_exit = True
736 try:
737 if fieldnames is None:
738 fieldnames = list(_object_attrnames(self.obs[0]))
739 if isinstance(fieldnames, basestring):
740 fieldnames = fieldnames.split()
741
742 csv_dest.write(','.join(fieldnames) + '\n')
743 csvout = csv.DictWriter(csv_dest, fieldnames, extrasaction='ignore')
744 if hasattr(self.obs[0], "__dict__"):
745 for o in self.obs:
746 csvout.writerow(o.__dict__)
747 else:
748 for o in self.obs:
749 row = dict(starmap(lambda obj, fld: (fld, getattr(obj, fld)),
750 zip(repeat(o), fieldnames)))
751 csvout.writerow(row)
752 finally:
753 if close_on_exit:
754 csv_dest.close()
755
756 - def compute(self, attrname, fn, default=None):
757 """Computes a new attribute for each object in table, or replaces an
758 existing attribute in each record with a computed value
759 @param attrname: attribute to compute for each object
760 @type attrname: string
761 @param fn: function used to compute new attribute value, based on
762 other values in the object
763 @type fn: function(obj) returns value
764 @param default: value to use if an exception is raised while trying
765 to evaluate fn
766 """
767 for rec in self:
768 try:
769 val = fn(rec)
770 except Exception:
771 val = default
772 if isinstance(rec, DataObject):
773 object.__setattr__(rec, attrname, val)
774 else:
775 setattr(rec, attrname, val)
776
777
779 """Enhanced Table containing pivot results from calling table.pivot().
780 """
781 - def __init__(self, parent, attr_val_path, attrlist):
782 """PivotTable initializer - do not create these directly, use
783 L{Table.pivot}.
784 """
785 super(PivotTable,self).__init__()
786 self._attr_path = attr_val_path[:]
787 self._pivot_attrs = attrlist[:]
788 self._subtable_dict = {}
789
790 for k,v in parent._indexes.items():
791 self._indexes[k] = v.copy_template()
792 if not attr_val_path:
793 self.insert_many(parent.obs)
794 else:
795 attr,val = attr_val_path[-1]
796 self.insert_many(parent.query(**{attr:val}))
797 parent._subtable_dict[val] = self
798
799 if len(attrlist) > 0:
800 this_attr = attrlist[0]
801 sub_attrlist = attrlist[1:]
802 ind = parent._indexes[this_attr]
803 self.subtables = [ PivotTable(self,
804 attr_val_path + [(this_attr,k)],
805 sub_attrlist) for k in sorted(ind.keys()) ]
806 else:
807 self.subtables = []
808
810 if self._subtable_dict:
811 return self._subtable_dict[val]
812 else:
813 return super(PivotTable,self).__getitem__(val)
814
816 return sorted(self._subtable_dict.keys())
817
819 return sorted(self._subtable_dict.items())
820
822 return self._subtable_dict.values()
823
825 """Return the set of attribute-value pairs that define the contents of this
826 table within the original source table.
827 """
828 return self._attr_path
829
831 """Return the pivot_key as a displayable string.
832 """
833 return '/'.join("%s:%s" % (attr,key) for attr,key in self._attr_path)
834
836 """Return whether this table has further subtables.
837 """
838 return bool(self.subtables)
839
840 - def dump(self, out=sys.stdout, row_fn=repr, maxrecs=-1, indent=0):
841 """Dump out the contents of this table in a nested listing.
842 @param out: output stream to write to
843 @param row_fn: function to call to display individual rows
844 @param maxrecs: number of records to show at deepest level of pivot (-1=show all)
845 @param indent: current nesting level
846 """
847 NL = '\n'
848 if indent:
849 out.write(" "*indent + self.pivot_key_str())
850 else:
851 out.write("Pivot: %s" % ','.join(self._pivot_attrs))
852 out.write(NL)
853 if self.has_subtables():
854 for sub in self.subtables:
855 if sub:
856 sub.dump(out, row_fn, maxrecs, indent+1)
857 else:
858 if maxrecs >= 0:
859 showslice = slice(0,maxrecs)
860 else:
861 showslice = slice(None,None)
862 for r in self.obs[showslice]:
863 out.write(" "*(indent+1) + row_fn(r) + NL)
864 out.flush()
865
867 """Dump out the summary counts of entries in this pivot table as a tabular listing.
868 @param out: output stream to write to
869 """
870 if len(self._pivot_attrs) == 1:
871 out.write("Pivot: %s\n" % ','.join(self._pivot_attrs))
872 maxkeylen = max(len(str(k)) for k in self.keys())
873 for sub in self.subtables:
874 out.write("%-*.*s " % (maxkeylen,maxkeylen,sub._attr_path[-1][1]))
875 out.write("%7d\n" % len(sub))
876 elif len(self._pivot_attrs) == 2:
877 out.write("Pivot: %s\n" % ','.join(self._pivot_attrs))
878 maxkeylen = max(max(len(str(k)) for k in self.keys()),5)
879 maxvallen = max(max(len(str(k)) for k in self.subtables[0].keys()),7)
880 keytally = dict((k,0) for k in self.subtables[0].keys())
881 out.write("%*s " % (maxkeylen,''))
882 out.write(' '.join("%*.*s" % (maxvallen,maxvallen,k) for k in self.subtables[0].keys()))
883 out.write(' Total\n')
884 for sub in self.subtables:
885 out.write("%-*.*s " % (maxkeylen,maxkeylen,sub._attr_path[-1][1]))
886 for ssub in sub.subtables:
887 out.write("%*d " % (maxvallen,len(ssub)))
888 keytally[ssub._attr_path[-1][1]] += len(ssub)
889 out.write("%7d\n" % len(sub))
890 out.write('%-*.*s ' % (maxkeylen,maxkeylen,"Total"))
891 out.write(' '.join("%*d" % (maxvallen,tally) for k,tally in sorted(keytally.items())))
892 out.write(" %7d\n" % sum(tally for k,tally in keytally.items()))
893 else:
894 raise ValueError("can only dump summary counts for 1 or 2-attribute pivots")
895
897 """Dump out the summary counts of this pivot table as a Table.
898 """
899 ret = Table()
900 topattr = self._pivot_attrs[0]
901 for attr in self._pivot_attrs:
902 ret.create_index(attr)
903 if len(self._pivot_attrs) == 1:
904 for sub in self.subtables:
905 subattr,subval = sub._attr_path[-1]
906 if fn is None:
907 ret.insert(DataObject(**{subattr:subval, 'Count':len(sub)}))
908 else:
909 attrdict[fn.__name__] = reduce(fn, (s[col] for s in sub))
910 elif len(self._pivot_attrs) == 2:
911 for sub in self.subtables:
912 for ssub in sub.subtables:
913 attrdict = dict(ssub._attr_path)
914 if fn is None:
915 attrdict['Count'] = len(ssub)
916 else:
917 attrdict[fn.__name__] = reduce(fn, (s[col] for s in ssub))
918 ret.insert(DataObject(**attrdict))
919 elif len(self._pivot_attrs) == 3:
920 for sub in self.subtables:
921 for ssub in sub.subtables:
922 for sssub in ssub.subtables:
923 attrdict = dict(sssub._attr_path)
924 if fn is None:
925 attrdict['Count'] = len(sssub)
926 else:
927 attrdict[fn.__name__] = reduce(fn, (s[col] for s in sssub))
928 ret.insert(DataObject(**attrdict))
929 else:
930 raise ValueError("can only dump summary counts for 1 or 2-attribute pivots")
931 return ret
932
934 """Temporary object created while composing a join across tables using
935 L{Table.join_on} and '+' addition. JoinTerm's are usually created by
936 calling join_on on a Table object, as in::
937
938 customers.join_on("id") + orders.join_on("custid")
939
940 This join expression would set up the join relationship
941 equivalent to::
942
943 customers.join(orders, id="custid")
944
945 If tables are being joined on attributes that have the same name in
946 both tables, then a join expression could be created by adding a
947 JoinTerm of one table directly to the other table::
948
949 customers.join_on("custid") + orders
950
951 Once the join expression is composed, the actual join is performed
952 using function call notation::
953
954 customerorders = customers.join_on("custid") + orders
955 for custord in customerorders():
956 print custord
957
958 When calling the join expression, you can optionally specify a
959 list of attributes as defined in L{Table.join}.
960 """
961 - def __init__(self, sourceTable, joinfield):
962 self.sourcetable = sourceTable
963 self.joinfield = joinfield
964 self.jointo = None
965
967 if isinstance(other, Table):
968 other = other.join_on(self.joinfield)
969 if isinstance(other, JoinTerm):
970 if self.jointo is None:
971 if other.jointo is None:
972 self.jointo = other
973 else:
974 self.jointo = other()
975 return self
976 else:
977 if other.jointo is None:
978 return self() + other
979 else:
980 return self() + other()
981 raise ValueError("cannot add object of type '%s' to JoinTerm" % other.__class__.__name__)
982
984 if isinstance(other, Table):
985 return other.join_on(self.joinfield) + self
986 raise ValueError("cannot add object of type '%s' to JoinTerm" % other.__class__.__name__)
987
989 if self.jointo:
990 other = self.jointo
991 if isinstance(other, Table):
992 other = other.join_on(self.joinfield)
993 ret = self.sourcetable.join(other.sourcetable, attrs,
994 **{self.joinfield : other.joinfield})
995 return ret
996 else:
997 return self.sourcetable.query()
998
1001
1002
1003 if __name__ == "__main__":
1004
1005
1006 from functools import partial
1007 try:
1008 import simplejson as json
1009 json_dumps = partial(json.dumps, indent=' ')
1010 except ImportError:
1011 import json
1012 json_dumps = partial(json.dumps, indent=2)
1013
1014
1015 rawdata = """\
1016 Phoenix:AZ:85001:KPHX
1017 Phoenix:AZ:85001:KPHY
1018 Phoenix:AZ:85001:KPHA
1019 Dallas:TX:75201:KDFW""".splitlines()
1020
1021
1022 stations = Table()
1023
1024 stations.create_index("stn", unique=True)
1025
1026 fields = "city state zip stn".split()
1027 for d in rawdata:
1028 ob = DataObject()
1029 for k,v in zip(fields, d.split(':')):
1030 setattr(ob,k,v.strip())
1031 stations.insert(ob)
1032
1033
1034 for queryargs in [
1035 dict(city="Phoenix"),
1036 dict(city="Phoenix", stn="KPHX"),
1037 dict(stn="KPHA", city="Phoenix"),
1038 dict(state="TX"),
1039 dict(city="New York"),
1040 dict(city="Phoenix", _orderby="stn"),
1041 dict(city="Phoenix", _orderbydesc="stn"),
1042 ]:
1043 print queryargs,
1044 result = stations.query(**queryargs)
1045 print len(result)
1046 for r in result: print r
1047 print
1048
1049
1050 print list(stations.query())
1051 print
1052
1053 amfm = Table()
1054 amfm.create_index("stn", unique=True)
1055 amfm.insert(DataObject(stn="KPHY", band="AM"))
1056 amfm.insert(DataObject(stn="KPHX", band="FM"))
1057 amfm.insert(DataObject(stn="KPHA", band="FM"))
1058 amfm.insert(DataObject(stn="KDFW", band="FM"))
1059
1060 try:
1061 amfm.insert(DataObject(stn="KPHA", band="AM"))
1062 except KeyError:
1063 print "duplicate key not allowed"
1064
1065 print
1066 for rec in (stations.join_on("stn") + amfm.join_on("stn")
1067 )(["stn", "city", (amfm,"band","AMFM"),
1068 (stations,"state","st")]).query(_orderby="AMFM"):
1069 print repr(rec)
1070
1071 print
1072 for rec in (stations.join_on("stn") + amfm.join_on("stn")
1073 )(["stn", "city", (amfm,"band"), (stations,"state","st")]):
1074 print json_dumps(rec.__dict__)
1075
1076 print
1077 for rec in (stations.join_on("stn") + amfm.join_on("stn"))():
1078 print json_dumps(rec.__dict__)
1079
1080 print
1081 stations.create_index("state")
1082 pivot = stations.pivot("state")
1083 pivot.dump_counts()
1084
1085 print
1086 amfm.create_index("band")
1087 pivot = (stations.join_on("stn") + amfm)().pivot("state band")
1088 pivot.dump_counts()
1089