1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31 __doc__ = """\
32
33 C{littletable} - a Python module to give ORM-like access to a collection of objects
34
35 The C{littletable} module provides a low-overhead, schema-less, in-memory database access to a
36 collection of user objects. C{littletable} provides a L{DataObject} class for ad hoc creation
37 of semi-immutable objects that can be stored in a C{littletable} L{Table}.
38
39 In addition to basic ORM-style insert/remove/query/delete access to the contents of a
40 Table, C{littletable} offers:
41 - simple indexing for improved retrieval performance, and optional enforcing key uniqueness
42 - access to objects using indexed attributes
43 - simplified joins using '+' operator syntax between annotated Tables
44 - the result of any query or join is a new first-class C{littletable} Table
45
46 C{littletable} Tables do not require an upfront schema definition, but simply work off of the
47 attributes in the stored values, and those referenced in any query parameters.
48
49 Here is a simple C{littletable} data storage/retrieval example::
50
51 from littletable import Table, DataObject
52
53 customers = Table('customers')
54 customers.create_index("id", unique=True)
55 customers.insert(DataObject(id="0010", name="George Jetson"))
56 customers.insert(DataObject(id="0020", name="Wile E. Coyote"))
57 customers.insert(DataObject(id="0030", name="Jonny Quest"))
58
59 catalog = Table('catalog')
60 catalog.create_index("sku", unique=True)
61 catalog.insert(DataObject(sku="ANVIL-001", descr="1000lb anvil", unitofmeas="EA",unitprice=100))
62 catalog.insert(DataObject(sku="BRDSD-001", descr="Bird seed", unitofmeas="LB",unitprice=3))
63 catalog.insert(DataObject(sku="MAGNT-001", descr="Magnet", unitofmeas="EA",unitprice=8))
64 catalog.insert(DataObject(sku="MAGLS-001", descr="Magnifying glass", unitofmeas="EA",unitprice=12))
65
66 wishitems = Table('wishitems')
67 wishitems.create_index("custid")
68 wishitems.create_index("sku")
69 wishitems.insert(DataObject(custid="0020", sku="ANVIL-001"))
70 wishitems.insert(DataObject(custid="0020", sku="BRDSD-001"))
71 wishitems.insert(DataObject(custid="0020", sku="MAGNT-001"))
72 wishitems.insert(DataObject(custid="0030", sku="MAGNT-001"))
73 wishitems.insert(DataObject(custid="0030", sku="MAGLS-001"))
74
75 # print a particular customer name
76 # (unique indexes will return a single item; non-unique
77 # indexes will return a list of all matching items)
78 print customers.id["0030"].name
79
80 # print all items sold by the pound
81 for item in catalog.query(unitofmeas="LB"):
82 print item.sku, item.descr
83
84 # print all items that cost more than 10
85 for item in catalog.where(lambda o : o.unitprice>10):
86 print item.sku, item.descr, item.unitprice
87
88 # join tables to create queryable wishlists collection
89 wishlists = customers.join_on("id") + wishitems.join_on("custid") + catalog.join_on("sku")
90
91 # print all wishlist items with price > 10
92 bigticketitems = wishlists().where(lambda ob : ob.unitprice > 10)
93 for item in bigticketitems:
94 print item
95
96 # list all wishlist items in descending order by price
97 for item in wishlists().query(_orderbydesc="unitprice"):
98 print item
99 """
100
101 __version__ = "0.3"
102 __versionTime__ = "24 Oct 2010 21:00"
103 __author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"
104
105 import sys
106 from collections import defaultdict
107 from itertools import groupby,ifilter,islice,starmap,repeat
108 import csv
109
110 try:
111 from itertools import product
112 except ImportError:
114 for a in aseq:
115 for b in bseq:
116 yield a,b
117
118 try:
119 t = basestring
120 except NameError:
121 basestring = str
122
123 __all__ = ["DataObject", "Table", "JoinTerm", "PivotTable"]
124
126 if hasattr(obj, "__dict__"):
127
128 return obj.__dict__.keys()
129 elif isinstance(obj, tuple) and hasattr(obj, "_fields"):
130
131 return obj._fields
132 elif hasattr(obj, "__slots__"):
133 return obj.__slots__
134 else:
135 raise ValueError("object with unknown attributes")
136
138 """A generic semi-mutable object for storing data values in a table. Attributes
139 can be set by passing in named arguments in the constructor, or by setting them
140 as C{object.attribute = value}. New attributes can be added any time, but updates
141 are ignored. Table joins are returned as a Table of DataObjects."""
143 if kwargs:
144 self.__dict__.update(kwargs)
146 return repr(self.__dict__)
152 if hasattr(self,k):
153 return getattr(self,k)
154 else:
155 raise KeyError("object has no such attribute " + k)
156
159 self.attr = attr
160 self.obs = defaultdict(list)
161 self.is_unique = False
163 self.obs[k].append(v)
165 return self.obs.get(k,[])
169 return iter(self.obs)
171 return sorted(self.obs.keys())
173 return self.obs.items()
175 try:
176 k = getattr(obj, self.attr)
177 self.obs[k].remove(obj)
178 except (ValueError,AttributeError,KeyError):
179 pass
181 return key in self.obs
183 return self.__class__(self.attr)
184
186 - def __init__(self, attr, accept_none=False):
187 self.attr = attr
188 self.obs = {}
189 self.is_unique = True
190 self.accept_none = accept_none
191 self.none_values = set()
193 if k:
194 if k not in self.obs:
195 self.obs[k] = v
196 else:
197 raise KeyError("duplicate key value %s" % k)
198 else:
199 self.none_values.add(v)
201 if k:
202 return [self.obs.get(k)] if k in self.obs else []
203 else:
204 return list(self.none_values)
206 if k:
207 return k in self.obs
208 else:
209 return self.accept_none and self.none_values
211 return sorted(self.obs.keys()) + ([None,] if self.none_values else [])
213 return [(k,[v]) for k,v in self.obs.items()]
215 k = getattr(obj, self.attr)
216 if k:
217 if k in self.obs:
218 del self.obs[k]
219 else:
220 self.none_values.discard(obj)
221
226 return getattr(self._index, attr)
228 ret = Table()
229 if k in self._index:
230 ret.insert_many(self._index[k])
231 return ret
233 return k in self._index
234
239 return getattr(self._index, attr)
241 return k in self._index
243 if k:
244 return self._index[k][0]
245 else:
246 ret = Table()
247 if k in self._index:
248 ret.insert_many(self._index[k])
249 return ret
250
251
253 """Table is the main class in C{littletable}, for representing a collection of DataObjects or
254 user-defined objects with publicly accessible attributes or properties. Tables can be:
255 - created, with an optional name, using standard Python L{C{Table() constructor}<__init__>}
256 - indexed, with multiple indexes, with unique or non-unique values, see L{create_index}
257 - queried, specifying values to exact match in the desired records, see L{query}
258 - filtered (using L{where}), using a simple predicate function to match desired records;
259 useful for selecting using inequalities or compound conditions
260 - accessed directly for keyed values, using C{table.indexattribute[key]} - see L{__getattr__}
261 - joined, using L{join_on} to identify attribute to be used for joining with another table, and
262 L{join} or operator '+' to perform the actual join
263 - pivoted, using L{pivot} to create a nested structure of sub-tables grouping objects
264 by attribute values
265 - imported/exported to CSV-format files
266 Queries and joins return their results as new Table objects, so that queries and joins can
267 be easily performed as a succession of operations.
268 """
270 """Create a new, empty Table.
271 @param table_name: name for Table
272 @type table_name: string (optional)
273 """
274 self.table_name = table_name
275 self.obs = []
276 self._indexes = {}
277
279 """Return the number of objects in the Table."""
280 return len(self.obs)
281
283 """Create an iterator over the objects in the Table."""
284 return iter(self.obs)
285
287 """Provides direct indexed/sliced access to the Table's underlying list of objects."""
288 return self.obs[i]
289
291 """A quick way to query for matching records using their indexed attributes. The attribute
292 name is used to locate the index, and returns a wrapper on the index. This wrapper provides
293 dict-like access to the underlying records in the table, as in::
294
295 employees.socsecnum["000-00-0000"]
296 customers.zipcode["12345"]
297
298 The behavior differs slightly for unique and non-unique indexes:
299 - if the index is unique, then retrieving a matching object, will return just the object;
300 if there is no matching object, C{KeyError} is raised
301 - if the index is non-unique, then all matching objects will be returned in a new Table,
302 just as if a regular query had been performed; if no objects match the key value, an empty
303 Table is returned and no exception is raised.
304
305 If there is no index defined for the given attribute, then C{AttributeError} is raised.
306 """
307 if attr in self._indexes:
308 ret = self._indexes[attr]
309 if isinstance(ret, _UniqueObjIndex):
310 ret = _UniqueObjIndexWrapper(ret)
311 if isinstance(ret, _ObjIndex):
312 ret = _ObjIndexWrapper(ret)
313 return ret
314 raise AttributeError("Table '%s' has no index '%s'" %
315 (self.table_name, attr))
316
318 return bool(self.obs)
319
320 __nonzero__ = __bool__
321
323 """A simple way to assign a name to a table, such as those
324 dynamically created by joins and queries.
325 @param table_name: name for Table
326 @type table_name: string
327 """
328 self.table_name = table_name
329 return self
330
332 """Create empty copy of the current table, with copies of all
333 index definitions.
334 """
335 ret = Table(self.table_name)
336 for k,v in self._indexes.items():
337 ret._indexes[k] = v.copy_template()
338 return ret
339
341 """Create full copy of the current table, including table contents
342 and index definitions.
343 """
344 ret = self.copy_template()
345 ret.insert_many(self.obs)
346 return ret
347
348 - def create_index(self, attr, unique=False, accept_none=False):
349 """Create a new index on a given attribute.
350 If C{unique} is True and records are found in the table with duplicate
351 attribute values, the index is deleted and C{KeyError} is raised.
352
353 If the table already has an index on the given attribute, then no
354 action is taken and no exception is raised.
355 @param attr: the attribute to be used for indexed access and joins
356 @type attr: string
357 @param unique: flag indicating whether the indexed field values are
358 expected to be unique across table entries
359 @type unique: boolean
360 @param accept_none: flag indicating whether None is an acceptable
361 value for this attribute
362 @type accept_none: boolean
363 """
364 if attr in self._indexes:
365 return
366
367 if unique:
368 self._indexes[attr] = _UniqueObjIndex(attr,accept_none)
369 else:
370 self._indexes[attr] = _ObjIndex(attr)
371 accept_none = True
372 ind = self._indexes[attr]
373 try:
374 for obj in self.obs:
375 if hasattr(obj, attr):
376 obval = getattr(obj, attr) or None
377 else:
378 obval = None
379 if obval or accept_none:
380 ind[obval] = obj
381 else:
382 raise KeyError("None is not an allowed key")
383
384 except KeyError:
385 del self._indexes[attr]
386 raise
387
389 """Deletes an index from the Table. Can be used to drop and rebuild an index,
390 or to convert a non-unique index to a unique index, or vice versa.
391 @param attr: name of an indexed attribute
392 @type attr: string
393 """
394 if attr in self._indexes:
395 del self._indexes[attr]
396
398 """Insert a new object into this Table.
399 @param obj: any Python object
400 Objects can be constructed using the defined DataObject type, or they can
401 be any Python object that does not use the Python C{__slots__} feature; C{littletable}
402 introspect's the object's C{__dict__} or C{_fields} attributes to obtain join and
403 index attributes and values.
404
405 If the table contains a unique index, and the record to be inserted would add
406 a duplicate value for the indexed attribute, then C{KeyError} is raised, and the
407 object is not inserted.
408
409 If the table has no unique indexes, then it is possible to insert duplicate
410 objects into the table.
411 """
412
413
414 uniqueIndexes = [ind for ind in self._indexes.values() if ind.is_unique]
415 if any((getattr(obj, ind.attr, None) is None and not ind.accept_none)
416 or (
417 hasattr(obj, ind.attr) and getattr(obj, ind.attr) in ind
418 )
419 for ind in uniqueIndexes):
420
421 for ind in uniqueIndexes:
422 if (getattr(obj, ind.attr, None) is None and not ind.accept_none):
423 raise KeyError("unique key cannot be None or blank for index %s" % ind.attr, obj)
424 if getattr(obj, ind.attr) in ind:
425 raise KeyError("duplicate unique key value '%s' for index %s" % (getattr(obj,ind.attr), ind.attr), obj)
426
427 self.obs.append(obj)
428 for attr, ind in self._indexes.items():
429 obval = getattr(obj, attr)
430 ind[obval] = obj
431
433 """Inserts a collection of objects into the table."""
434 for ob in it:
435 self.insert(ob)
436
438 """Removes an object from the table. If object is not in the table, then
439 no action is taken and no exception is raised."""
440
441 for attr,ind in self._indexes.items():
442 ind.remove(ob)
443
444
445 self.obs.remove(ob)
446
448 """Removes a collection of objects from the table."""
449 for ob in it:
450 self.remove(ob)
451
453 attr,val = attr_val
454 if attr in self._indexes:
455 idx = self._indexes[attr]
456 if v in idx:
457 return len(idx[v])
458 else:
459 return 0
460 else:
461 return 1e9
462
463 - def query(self, **kwargs):
464 """Retrieves matching objects from the table, based on given
465 named parameters. If multiple named parameters are given, then
466 only objects that satisfy all of the query criteria will be returned.
467
468 Special kwargs:
469 - C{_orderby="attr,..."} - resulting table should sort content objects
470 by the C{attr}s given in a comma-separated string; to sort in
471 descending order, reference the attribute as C{attr desc}.
472
473 @param **kwargs: attributes for selecting records, given as additional
474 named arguments of the form C{attrname="attrvalue"}.
475 @return: a new Table containing the matching objects
476 """
477
478 flags = [(k,v) for k,v in kwargs.items() if k.startswith("_")]
479 for f,v in flags:
480 del kwargs[f]
481
482 if kwargs:
483 ret = self.copy_template()
484 first = True
485
486
487
488
489
490 kwargs = kwargs.items()
491 if len(kwargs) > 1 and len(self.obs) > 100:
492 kwargs = sorted(kwargs, key=self._query_attr_sort_fn)
493 for k,v in kwargs:
494 if k in flags:
495 continue
496 if first:
497 if k in self._indexes:
498 ret.insert_many(self._indexes[k][v])
499 else:
500 ret.insert_many( r for r in self.obs
501 if hasattr(r,k) and getattr(r,k) == v )
502 else:
503 if k in ret._indexes:
504 newret = ret.copy_template()
505 newret.insert_many(ret._indexes[k][v])
506 ret = newret
507 else:
508 retobs = ret.obs[:]
509 ret.remove_many( o for o in retobs
510 if not hasattr(r,k)
511 or (getattr(r,k) != v) )
512 first = False
513 else:
514 ret = self.clone()
515
516 for f,v in flags:
517 if f == "_orderby":
518 attrs = [s.strip() for s in v.split(',')]
519 attr_orders = [(a.split()+['asc',])[:2] for a in attrs][::-1]
520 for attr,order in attr_orders:
521 ret.obs.sort(key=lambda ob:getattr(ob,attr), reverse=(order=="desc"))
522
523 return ret
524
526 """Deletes matching objects from the table, based on given
527 named parameters. If multiple named parameters are given, then
528 only objects that satisfy all of the query criteria will be removed.
529 @param **kwargs: attributes for selecting records, given as additional
530 named arguments of the form C{attrname="attrvalue"}.
531 @return: the number of objects removed from the table
532 """
533 if not kwargs:
534 return 0
535
536 affected = self.query(**kwargs)
537 self.remove_many(affected)
538 return len(affected)
539
540 - def where(self, wherefn, maxrecs=0):
541 """An alternative to L{query}, using a matching predicate function to
542 determine whether a given object matches the query or not. You must use
543 C{where} in place of C{query} if you want to query using inequalities or more
544 complex matching criteria than simple C{attribute=value}.
545 @param wherefn: a method or lambda that returns a boolean result, as in::
546
547 lambda ob : ob.unitprice > 10
548
549 @type wherefn: callable(object) returning boolean
550 @param maxrecs: if only the first 'n' records are needed, then C{where} will
551 stop after locating 'n' matching records
552 @type maxrecs: int
553 @returns: a new Table containing the matching records
554 """
555 ret = self.copy_template()
556 if maxrecs:
557 ret.insert_many(islice(ifilter(wherefn, self.obs), 0, maxrecs))
558 else:
559 ret.insert_many(ifilter(wherefn, self.obs))
560 return ret
561
562 - def join(self, other, attrlist=None, **kwargs):
563 """
564 Join the objects of one table with the objects of another, based on the given
565 matching attributes in the named arguments. The attrlist specifies the attributes to
566 be copied from the source tables - if omitted, all attributes will be copied. Entries
567 in the attrlist may be single attribute names, or if there are duplicate names in both
568 tables, then a C{(table,attributename)} tuple can be given to disambiguate which
569 attribute is desired. A C{(table,attributename,alias)} tuple can also be passed, to
570 rename an attribute from a source table.
571
572 This method may be called directly, or can be constructed using the L{join_on} method and
573 the '+' operator. Using this syntax, the join is specified using C{table.join_on("xyz")}
574 to create a JoinTerm containing both table and joining attribute. Multiple JoinTerm
575 or tables can be added to construct a compound join expression. When complete, the
576 join expression gets executed by calling the resulting join definition,
577 using C{join_expression([attrlist])}.
578
579 @param other: other table to join to
580 @param attrlist: list of attributes to be copied to the new joined table; if
581 none provided, all attributes of both tables will be used (taken from the first
582 object in each table)
583 @type attrlist: string, or list of strings or C{(table,attribute[,alias])} tuples
584 (list may contain both strings and tuples)
585 @param **kwargs: attributes to join on, given as additional named arguments
586 of the form C{table1attr="table2attr"}, or a dict mapping attribute names.
587 @returns: a new Table containing the joined data as new DataObjects
588 """
589 thiscol,othercol = kwargs.items()[0]
590
591 retname = ("(%s:%s^%s:%s)" %
592 (self.table_name, thiscol, other.table_name, othercol))
593
594 if not (self.obs and other.obs):
595 return Table(retname)
596
597 if isinstance(attrlist, basestring):
598 attrlist = attrlist.split()
599
600
601 thisnames = set(_object_attrnames(self.obs[0]))
602 othernames = set(_object_attrnames(other.obs[0]))
603 fullcols = []
604 if attrlist is not None:
605 for col in attrlist:
606 if isinstance(col, tuple):
607
608
609 fullcols.append((col + (col[1],))[:3])
610 else:
611 if col in thisnames:
612 fullcols.append( (self, col, col) )
613 elif col in othernames:
614 fullcols.append( (other, col, col) )
615 else:
616 pass
617 else:
618 fullcols = [(self,n,n) for n in thisnames]
619 fullcols += [(other,n,n) for n in othernames]
620
621 thiscols = list(ifilter(lambda o:o[0] is self, fullcols))
622 othercols = list(ifilter(lambda o:o[0] is other, fullcols))
623
624 thiscolindex = othercolindex = None
625 if thiscol in self._indexes:
626 thiscolindex = self._indexes[thiscol]
627 if othercol in other._indexes:
628 othercolindex = other._indexes[othercol]
629 if not(thiscolindex and othercolindex):
630 raise ValueError("can only join on indexed attributes")
631
632
633 if len(thiscolindex) < len(othercolindex):
634 shortindex, longindex = (thiscolindex, othercolindex)
635 swap = False
636 else:
637 shortindex, longindex = (othercolindex, thiscolindex)
638 swap = True
639
640
641 matchingrows = []
642 for key,rows in shortindex.items():
643 if key in longindex:
644 if swap:
645 matchingrows.append( (longindex[key], rows) )
646 else:
647 matchingrows.append( (rows, longindex[key]) )
648
649 joinrows = []
650 for thisrows,otherrows in matchingrows:
651 for trow,orow in product(thisrows,otherrows):
652 retobj = DataObject()
653 for _,c,a in thiscols:
654 setattr(retobj, a, getattr(trow,c))
655 for _,c,a in othercols:
656 setattr(retobj, a, getattr(orow,c))
657 joinrows.append(retobj)
658
659 ret = Table(retname)
660 for tbl,collist in zip([self,other],[thiscols,othercols]):
661 for _,c,a in collist:
662 if c in tbl._indexes:
663 ret.create_index(a)
664 ret.insert_many(joinrows)
665 return ret
666
668 """Creates a JoinTerm in preparation for joining with another table, to
669 indicate what attribute should be used in the join. Only indexed attributes
670 may be used in a join.
671 @param attr: attribute name to join from this table (may be different
672 from the attribute name in the table being joined to)
673 @type attr: string
674 @returns: L{JoinTerm}"""
675 if attr not in self._indexes:
676 raise ValueError("can only join on indexed attributes")
677 return JoinTerm(self, attr)
678
679 - def pivot(self, attrlist):
680 """Pivots the data using the given attributes, returning a L{PivotTable}.
681 @param attrlist: list of attributes to be used to construct the pivot table
682 @type attrlist: list of strings, or string of space-delimited attribute names
683 """
684 if isinstance(attrlist, basestring):
685 attrlist = attrlist.split()
686 if all(a in self._indexes for a in attrlist):
687 return PivotTable(self,[],attrlist)
688 else:
689 raise ValueError("pivot can only be called using indexed attributes")
690
692 """Imports the contents of a CSV-formatted file into this table.
693 @param csv_source: CSV file - if a string is given, the file with that name will be
694 opened, read, and closed; if a file object is given, then that object
695 will be read as-is, and left for the caller to be closed.
696 @type csv_source: string or file
697 """
698 close_on_exit = False
699 if isinstance(csv_source, basestring):
700 csv_source = open(csv_source)
701 close_on_exit = True
702 try:
703 csvdata = csv.DictReader(csv_source)
704 self.insert_many(DataObject(**s) for s in csvdata)
705 finally:
706 if close_on_exit:
707 csv_source.close()
708
710 """Exports the contents of the table to a CSV-formatted file.
711 @param csv_dest: CSV file - if a string is given, the file with that name will be
712 opened, written, and closed; if a file object is given, then that object
713 will be written as-is, and left for the caller to be closed.
714 @type csv_dest: string or file
715 @param fieldnames: attribute names to be exported; can be given as a single
716 string with space-delimited names, or as a list of attribute names
717 """
718 close_on_exit = False
719 if isinstance(csv_dest, basestring):
720 csv_dest = open(csv_dest,'wb')
721 close_on_exit = True
722 try:
723 if fieldnames is None:
724 fieldnames = list(_object_attrnames(self.obs[0]))
725 if isinstance(fieldnames, basestring):
726 fieldnames = fieldnames.split()
727
728 csv_dest.write(','.join(fieldnames) + '\n')
729 csvout = csv.DictWriter(csv_dest, fieldnames, extrasaction='ignore')
730 if hasattr(self.obs[0], "__dict__"):
731 for o in self.obs:
732 csvout.writerow(o.__dict__)
733 else:
734 for o in self.obs:
735 row = dict(starmap(lambda obj, fld: (fld, getattr(obj, fld)),
736 zip(repeat(o), fieldnames)))
737 csvout.writerow(row)
738 finally:
739 if close_on_exit:
740 csv_dest.close()
741
742
744 """Enhanced Table containing pivot results from calling table.pivot().
745 """
746 - def __init__(self, parent, attr_val_path, attrlist):
747 """PivotTable initializer - do not create these directly, use
748 L{Table.pivot}.
749 """
750 super(PivotTable,self).__init__()
751 self._attr_path = attr_val_path[:]
752 self._pivot_attrs = attrlist[:]
753 self._subtable_dict = {}
754
755 for k,v in parent._indexes.items():
756 self._indexes[k] = v.copy_template()
757 if not attr_val_path:
758 self.insert_many(parent.obs)
759 else:
760 attr,val = attr_val_path[-1]
761 self.insert_many(parent.query(**{attr:val}))
762 parent._subtable_dict[val] = self
763
764 if len(attrlist) > 0:
765 this_attr = attrlist[0]
766 sub_attrlist = attrlist[1:]
767 ind = parent._indexes[this_attr]
768 self.subtables = [ PivotTable(self,
769 attr_val_path + [(this_attr,k)],
770 sub_attrlist) for k in sorted(ind.keys()) ]
771 else:
772 self.subtables = []
773
775 if self._subtable_dict:
776 return self._subtable_dict[val]
777 else:
778 return super(PivotTable,self).__getitem__(val)
779
781 return sorted(self._subtable_dict.keys())
782
784 return sorted(self._subtable_dict.items())
785
787 return self._subtable_dict.values()
788
790 """Return the set of attribute-value pairs that define the contents of this
791 table within the original source table.
792 """
793 return self._attr_path
794
796 """Return the pivot_key as a displayable string.
797 """
798 return '/'.join("%s:%s" % (attr,key) for attr,key in self._attr_path)
799
801 """Return whether this table has further subtables.
802 """
803 return bool(self.subtables)
804
805 - def dump(self, out=sys.stdout, row_fn=repr, maxrecs=-1, indent=0):
806 """Dump out the contents of this table in a nested listing.
807 @param out: output stream to write to
808 @param row_fn: function to call to display individual rows
809 @param maxrecs: number of records to show at deepest level of pivot (-1=show all)
810 @param indent: current nesting level
811 """
812 NL = '\n'
813 if indent:
814 out.write(" "*indent + self.pivot_key_str())
815 else:
816 out.write("Pivot: %s" % ','.join(self._pivot_attrs))
817 out.write(NL)
818 if self.has_subtables():
819 for sub in self.subtables:
820 if sub:
821 sub.dump(out, row_fn, maxrecs, indent+1)
822 else:
823 if maxrecs >= 0:
824 showslice = slice(0,maxrecs)
825 else:
826 showslice = slice(None,None)
827 for r in self.obs[showslice]:
828 out.write(" "*(indent+1) + row_fn(r) + NL)
829 out.flush()
830
832 """Dump out the summary counts of entries in this pivot table as a tabular listing.
833 @param out: output stream to write to
834 """
835 if len(self._pivot_attrs) == 1:
836 out.write("Pivot Summary: %s\n" % ','.join(self._pivot_attrs))
837 maxkeylen = max(len(str(k)) for k in self.keys())
838 for sub in self.subtables:
839 out.write("%-*.*s " % (maxkeylen,maxkeylen,sub._attr_path[-1][1]))
840 out.write("%7d\n" % len(sub))
841 elif len(self._pivot_attrs) == 2:
842 out.write("Pivot Summary: %s\n" % ','.join(self._pivot_attrs))
843 maxkeylen = max(max(len(str(k)) for k in self.keys()),5)
844 maxvallen = max(max(len(str(k)) for k in self.subtables[0].keys()),7)
845 keytally = dict((k,0) for k in self.subtables[0].keys())
846 out.write("%*s " % (maxkeylen,''))
847 out.write(' '.join("%*.*s" % (maxvallen,maxvallen,k) for k in self.subtables[0].keys()))
848 out.write(' Total\n')
849 for sub in self.subtables:
850 out.write("%-*.*s " % (maxkeylen,maxkeylen,sub._attr_path[-1][1]))
851 for ssub in sub.subtables:
852 out.write("%*d " % (maxvallen,len(ssub)))
853 keytally[ssub._attr_path[-1][1]] += len(ssub)
854 out.write("%7d\n" % len(sub))
855 out.write('%-*.*s ' % (maxkeylen,maxkeylen,"Total"))
856 out.write(' '.join("%*d" % (maxvallen,tally) for k,tally in sorted(keytally.items())))
857 out.write(" %7d\n" % sum(tally for k,tally in keytally.items()))
858 else:
859 raise ValueError("can only dump summary counts for 1 or 2-attribute pivots")
860
862 """Dump out the summary counts of this pivot table as a Table.
863 """
864 ret = Table()
865 topattr = self._pivot_attrs[0]
866 for attr in self._pivot_attrs:
867 ret.create_index(attr)
868 if len(self._pivot_attrs) == 1:
869 for sub in self.subtables:
870 subattr,subval = sub._attr_path[-1]
871 ret.insert(DataObject(**{subattr:subval, 'Count':len(sub)}))
872 elif len(self._pivot_attrs) == 2:
873 for sub in self.subtables:
874 for ssub in sub.subtables:
875 attrdict = dict(ssub._attr_path)
876 attrdict['Count'] = len(ssub)
877 ret.insert(DataObject(**attrdict))
878 elif len(self._pivot_attrs) == 3:
879 for sub in self.subtables:
880 for ssub in sub.subtables:
881 for sssub in ssub.subtables:
882 attrdict = dict(sssub._attr_path)
883 attrdict['Count'] = len(sssub)
884 ret.insert(DataObject(**attrdict))
885 else:
886 raise ValueError("can only dump summary counts for 1 or 2-attribute pivots")
887 return ret
888
890 """Temporary object created while composing a join across tables using
891 L{Table.join_on} and '+' addition. JoinTerm's are usually created by
892 calling join_on on a Table object, as in::
893
894 customers.join_on("id") + orders.join_on("custid")
895
896 This join expression would set up the join relationship
897 equivalent to::
898
899 customers.join(orders, id="custid")
900
901 If tables are being joined on attributes that have the same name in
902 both tables, then a join expression could be created by adding a
903 JoinTerm of one table directly to the other table::
904
905 customers.join_on("custid") + orders
906
907 Once the join expression is composed, the actual join is performed
908 using function call notation::
909
910 customerorders = customers.join_on("custid") + orders
911 for custord in customerorders():
912 print custord
913
914 When calling the join expression, you can optionally specify a
915 list of attributes as defined in L{Table.join}.
916 """
917 - def __init__(self, sourceTable, joinfield):
918 self.sourcetable = sourceTable
919 self.joinfield = joinfield
920 self.jointo = None
921
923 if isinstance(other, Table):
924 other = other.join_on(self.joinfield)
925 if isinstance(other, JoinTerm):
926 if self.jointo is None:
927 if other.jointo is None:
928 self.jointo = other
929 else:
930 self.jointo = other()
931 return self
932 else:
933 if other.jointo is None:
934 return self() + other
935 else:
936 return self() + other()
937 raise ValueError("cannot add object of type '%s' to JoinTerm" % other.__class__.__name__)
938
940 if isinstance(other, Table):
941 return other.join_on(self.joinfield) + self
942 raise ValueError("cannot add object of type '%s' to JoinTerm" % other.__class__.__name__)
943
945 if self.jointo:
946 other = self.jointo
947 if isinstance(other, Table):
948 other = other.join_on(self.joinfield)
949 ret = self.sourcetable.join(other.sourcetable, attrs,
950 **{self.joinfield : other.joinfield})
951 return ret
952 else:
953 return self.sourcetable.query()
954
957
958
959 if __name__ == "__main__":
960
961
962 from functools import partial
963 try:
964 import simplejson as json
965 json_dumps = partial(json.dumps, indent=' ')
966 except ImportError:
967 import json
968 json_dumps = partial(json.dumps, indent=2)
969
970
971 rawdata = """\
972 Phoenix:AZ:85001:KPHX
973 Phoenix:AZ:85001:KPHY
974 Phoenix:AZ:85001:KPHA
975 Dallas:TX:75201:KDFW""".splitlines()
976
977
978 stations = Table()
979
980 stations.create_index("stn", unique=True)
981
982 fields = "city state zip stn".split()
983 for d in rawdata:
984 ob = DataObject()
985 for k,v in zip(fields, d.split(':')):
986 setattr(ob,k,v.strip())
987 stations.insert(ob)
988
989
990 for queryargs in [
991 dict(city="Phoenix"),
992 dict(city="Phoenix", stn="KPHX"),
993 dict(stn="KPHA", city="Phoenix"),
994 dict(state="TX"),
995 dict(city="New York"),
996 dict(city="Phoenix", _orderby="stn"),
997 dict(city="Phoenix", _orderbydesc="stn"),
998 ]:
999 print queryargs,
1000 result = stations.query(**queryargs)
1001 print len(result)
1002 for r in result: print r
1003 print
1004
1005
1006 print list(stations.query())
1007 print
1008
1009 amfm = Table()
1010 amfm.create_index("stn", unique=True)
1011 amfm.insert(DataObject(stn="KPHY", band="AM"))
1012 amfm.insert(DataObject(stn="KPHX", band="FM"))
1013 amfm.insert(DataObject(stn="KPHA", band="FM"))
1014 amfm.insert(DataObject(stn="KDFW", band="FM"))
1015
1016 try:
1017 amfm.insert(DataObject(stn="KPHA", band="AM"))
1018 except KeyError:
1019 print "duplicate key not allowed"
1020
1021 print
1022 for rec in (stations.join_on("stn") + amfm.join_on("stn")
1023 )(["stn", "city", (amfm,"band","AMFM"),
1024 (stations,"state","st")]).query(_orderby="AMFM"):
1025 print repr(rec)
1026
1027 print
1028 for rec in (stations.join_on("stn") + amfm.join_on("stn")
1029 )(["stn", "city", (amfm,"band"), (stations,"state","st")]):
1030 print json_dumps(rec.__dict__)
1031
1032 print
1033 for rec in (stations.join_on("stn") + amfm.join_on("stn"))():
1034 print json_dumps(rec.__dict__)
1035
1036 print
1037 stations.create_index("state")
1038 pivot = stations.pivot("state")
1039 pivot.dump_counts()
1040
1041 print
1042 amfm.create_index("band")
1043 pivot = (stations.join_on("stn") + amfm)().pivot("state band")
1044 pivot.dump_counts()
1045