Package ete2 :: Package clustering :: Module pstat
[hide private]
[frames] | no frames]

Source Code for Module ete2.clustering.pstat

   1  # #START_LICENSE########################################################### 
   2  # 
   3  # Copyright (C) 2009 by Jaime Huerta Cepas. All rights reserved.   
   4  # email: jhcepas@gmail.com 
   5  # 
   6  # This file is part of the Environment for Tree Exploration program (ETE).  
   7  # http://ete.cgenomics.org 
   8  #   
   9  # ETE is free software: you can redistribute it and/or modify it 
  10  # under the terms of the GNU General Public License as published by 
  11  # the Free Software Foundation, either version 3 of the License, or 
  12  # (at your option) any later version. 
  13  #   
  14  # ETE is distributed in the hope that it will be useful, 
  15  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
  16  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
  17  # GNU General Public License for more details. 
  18  #   
  19  # You should have received a copy of the GNU General Public License 
  20  # along with ETE.  If not, see <http://www.gnu.org/licenses/>. 
  21  # 
  22  # #END_LICENSE############################################################# 
  23  __VERSION__="ete2-2.0rev104"  
  24  # Copyright (c) 1999-2007 Gary Strangman; All Rights Reserved.
 
  25  #
 
  26  # Permission is hereby granted, free of charge, to any person obtaining a copy
 
  27  # of this software and associated documentation files (the "Software"), to deal
 
  28  # in the Software without restriction, including without limitation the rights
 
  29  # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 
  30  # copies of the Software, and to permit persons to whom the Software is
 
  31  # furnished to do so, subject to the following conditions:
 
  32  # 
 
  33  # The above copyright notice and this permission notice shall be included in
 
  34  # all copies or substantial portions of the Software.
 
  35  # 
 
  36  # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 
  37  # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 
  38  # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 
  39  # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 
  40  # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 
  41  # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 
  42  # THE SOFTWARE.
 
  43  #
 
  44  # Comments and/or additions are welcome (send e-mail to:
 
  45  # strang@nmr.mgh.harvard.edu).
 
  46  # 
 
  47  """
 
  48  pstat.py module
 
  49  
 
  50  #################################################
 
  51  #######  Written by:  Gary Strangman  ###########
 
  52  #######  Last modified:  Dec 18, 2007 ###########
 
  53  #################################################
 
  54  
 
  55  This module provides some useful list and array manipulation routines
 
  56  modeled after those found in the |Stat package by Gary Perlman, plus a
 
  57  number of other useful list/file manipulation functions.  The list-based
 
  58  functions include:
 
  59  
 
  60        abut (source,*args)
 
  61        simpleabut (source, addon)
 
  62        colex (listoflists,cnums)
 
  63        collapse (listoflists,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None)
 
  64        dm (listoflists,criterion)
 
  65        flat (l)
 
  66        linexand (listoflists,columnlist,valuelist)
 
  67        linexor (listoflists,columnlist,valuelist)
 
  68        linedelimited (inlist,delimiter)
 
  69        lineincols (inlist,colsize) 
 
  70        lineincustcols (inlist,colsizes)
 
  71        list2string (inlist)
 
  72        makelol(inlist)
 
  73        makestr(x)
 
  74        printcc (lst,extra=2)
 
  75        printincols (listoflists,colsize)
 
  76        pl (listoflists)
 
  77        printl(listoflists)
 
  78        replace (lst,oldval,newval)
 
  79        recode (inlist,listmap,cols='all')
 
  80        remap (listoflists,criterion)
 
  81        roundlist (inlist,num_digits_to_round_floats_to)
 
  82        sortby(listoflists,sortcols)
 
  83        unique (inlist)
 
  84        duplicates(inlist)
 
  85        writedelimited (listoflists, delimiter, file, writetype='w')
 
  86  
 
  87  Some of these functions have alternate versions which are defined only if
 
  88  Numeric (NumPy) can be imported.  These functions are generally named as
 
  89  above, with an 'a' prefix.
 
  90  
 
  91        aabut (source, *args)
 
  92        acolex (a,indices,axis=1)
 
  93        acollapse (a,keepcols,collapsecols,sterr=0,ns=0)
 
  94        adm (a,criterion)
 
  95        alinexand (a,columnlist,valuelist)
 
  96        alinexor (a,columnlist,valuelist)
 
  97        areplace (a,oldval,newval)
 
  98        arecode (a,listmap,col='all')
 
  99        arowcompare (row1, row2)
 
 100        arowsame (row1, row2)
 
 101        asortrows(a,axis=0)
 
 102        aunique(inarray)
 
 103        aduplicates(inarray)
 
 104  
 
 105  Currently, the code is all but completely un-optimized.  In many cases, the
 
 106  array versions of functions amount simply to aliases to built-in array
 
 107  functions/methods.  Their inclusion here is for function name consistency.
 
 108  """ 
 109  
 
 110  ## CHANGE LOG:
 
 111  ## ==========
 
 112  ## 07-11-26 ... edited to work with numpy
 
 113  ## 01-11-15 ... changed list2string() to accept a delimiter
 
 114  ## 01-06-29 ... converted exec()'s to eval()'s to make compatible with Py2.1
 
 115  ## 01-05-31 ... added duplicates() and aduplicates() functions
 
 116  ## 00-12-28 ... license made GPL, docstring and import requirements
 
 117  ## 99-11-01 ... changed version to 0.3
 
 118  ## 99-08-30 ... removed get, getstrings, put, aget, aput (into io.py)
 
 119  ## 03/27/99 ... added areplace function, made replace fcn recursive
 
 120  ## 12/31/98 ... added writefc function for ouput to fixed column sizes
 
 121  ## 12/07/98 ... fixed import problem (failed on collapse() fcn)
 
 122  ##              added __version__ variable (now 0.2)
 
 123  ## 12/05/98 ... updated doc-strings
 
 124  ##              added features to collapse() function
 
 125  ##              added flat() function for lists
 
 126  ##              fixed a broken asortrows() 
 
 127  ## 11/16/98 ... fixed minor bug in aput for 1D arrays
 
 128  ##
 
 129  ## 11/08/98 ... fixed aput to output large arrays correctly
 
 130  
 
 131  import stats  # required 3rd party module 
 132  import string, copy 
 133  from types import * 
 134  
 
 135  __version__ = 0.4 
 136  
 
 137  ###===========================  LIST FUNCTIONS  ==========================
 
 138  ###
 
 139  ### Here are the list functions, DEFINED FOR ALL SYSTEMS.
 
 140  ### Array functions (for NumPy-enabled computers) appear below.
 
 141  ###
 
 142  
 
143 -def abut (source,*args):
144 """ 145 Like the |Stat abut command. It concatenates two lists side-by-side 146 and returns the result. '2D' lists are also accomodated for either argument 147 (source or addon). CAUTION: If one list is shorter, it will be repeated 148 until it is as long as the longest list. If this behavior is not desired, 149 use pstat.simpleabut(). 150 151 Usage: abut(source, args) where args=any # of lists 152 Returns: a list of lists as long as the LONGEST list past, source on the 153 'left', lists in <args> attached consecutively on the 'right' 154 """ 155 156 if type(source) not in [ListType,TupleType]: 157 source = [source] 158 for addon in args: 159 if type(addon) not in [ListType,TupleType]: 160 addon = [addon] 161 if len(addon) < len(source): # is source list longer? 162 if len(source) % len(addon) == 0: # are they integer multiples? 163 repeats = len(source)/len(addon) # repeat addon n times 164 origadd = copy.deepcopy(addon) 165 for i in range(repeats-1): 166 addon = addon + origadd 167 else: 168 repeats = len(source)/len(addon)+1 # repeat addon x times, 169 origadd = copy.deepcopy(addon) # x is NOT an integer 170 for i in range(repeats-1): 171 addon = addon + origadd 172 addon = addon[0:len(source)] 173 elif len(source) < len(addon): # is addon list longer? 174 if len(addon) % len(source) == 0: # are they integer multiples? 175 repeats = len(addon)/len(source) # repeat source n times 176 origsour = copy.deepcopy(source) 177 for i in range(repeats-1): 178 source = source + origsour 179 else: 180 repeats = len(addon)/len(source)+1 # repeat source x times, 181 origsour = copy.deepcopy(source) # x is NOT an integer 182 for i in range(repeats-1): 183 source = source + origsour 184 source = source[0:len(addon)] 185 186 source = simpleabut(source,addon) 187 return source
188 189
190 -def simpleabut (source, addon):
191 """ 192 Concatenates two lists as columns and returns the result. '2D' lists 193 are also accomodated for either argument (source or addon). This DOES NOT 194 repeat either list to make the 2 lists of equal length. Beware of list pairs 195 with different lengths ... the resulting list will be the length of the 196 FIRST list passed. 197 198 Usage: simpleabut(source,addon) where source, addon=list (or list-of-lists) 199 Returns: a list of lists as long as source, with source on the 'left' and 200 addon on the 'right' 201 """ 202 if type(source) not in [ListType,TupleType]: 203 source = [source] 204 if type(addon) not in [ListType,TupleType]: 205 addon = [addon] 206 minlen = min(len(source),len(addon)) 207 list = copy.deepcopy(source) # start abut process 208 if type(source[0]) not in [ListType,TupleType]: 209 if type(addon[0]) not in [ListType,TupleType]: 210 for i in range(minlen): 211 list[i] = [source[i]] + [addon[i]] # source/addon = column 212 else: 213 for i in range(minlen): 214 list[i] = [source[i]] + addon[i] # addon=list-of-lists 215 else: 216 if type(addon[0]) not in [ListType,TupleType]: 217 for i in range(minlen): 218 list[i] = source[i] + [addon[i]] # source=list-of-lists 219 else: 220 for i in range(minlen): 221 list[i] = source[i] + addon[i] # source/addon = list-of-lists 222 source = list 223 return source
224 225
226 -def colex (listoflists,cnums):
227 """ 228 Extracts from listoflists the columns specified in the list 'cnums' 229 (cnums can be an integer, a sequence of integers, or a string-expression that 230 corresponds to a slice operation on the variable x ... e.g., 'x[3:]' will colex 231 columns 3 onward from the listoflists). 232 233 Usage: colex (listoflists,cnums) 234 Returns: a list-of-lists corresponding to the columns from listoflists 235 specified by cnums, in the order the column numbers appear in cnums 236 """ 237 global index 238 column = 0 239 if type(cnums) in [ListType,TupleType]: # if multiple columns to get 240 index = cnums[0] 241 column = map(lambda x: x[index], listoflists) 242 for col in cnums[1:]: 243 index = col 244 column = abut(column,map(lambda x: x[index], listoflists)) 245 elif type(cnums) == StringType: # if an 'x[3:]' type expr. 246 evalstring = 'map(lambda x: x'+cnums+', listoflists)' 247 column = eval(evalstring) 248 else: # else it's just 1 col to get 249 index = cnums 250 column = map(lambda x: x[index], listoflists) 251 return column
252 253
254 -def collapse (listoflists,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None):
255 """ 256 Averages data in collapsecol, keeping all unique items in keepcols 257 (using unique, which keeps unique LISTS of column numbers), retaining the 258 unique sets of values in keepcols, the mean for each. Setting fcn1 259 and/or fcn2 to point to a function rather than None (e.g., stats.sterr, len) 260 will append those results (e.g., the sterr, N) after each calculated mean. 261 cfcn is the collapse function to apply (defaults to mean, defined here in the 262 pstat module to avoid circular imports with stats.py, but harmonicmean or 263 others could be passed). 264 265 Usage: collapse (listoflists,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None) 266 Returns: a list of lists with all unique permutations of entries appearing in 267 columns ("conditions") specified by keepcols, abutted with the result of 268 cfcn (if cfcn=None, defaults to the mean) of each column specified by 269 collapsecols. 270 """ 271 def collmean (inlist): 272 s = 0 273 for item in inlist: 274 s = s + item 275 return s/float(len(inlist))
276 277 if type(keepcols) not in [ListType,TupleType]: 278 keepcols = [keepcols] 279 if type(collapsecols) not in [ListType,TupleType]: 280 collapsecols = [collapsecols] 281 if cfcn == None: 282 cfcn = collmean 283 if keepcols == []: 284 means = [0]*len(collapsecols) 285 for i in range(len(collapsecols)): 286 avgcol = colex(listoflists,collapsecols[i]) 287 means[i] = cfcn(avgcol) 288 if fcn1: 289 try: 290 test = fcn1(avgcol) 291 except: 292 test = 'N/A' 293 means[i] = [means[i], test] 294 if fcn2: 295 try: 296 test = fcn2(avgcol) 297 except: 298 test = 'N/A' 299 try: 300 means[i] = means[i] + [len(avgcol)] 301 except TypeError: 302 means[i] = [means[i],len(avgcol)] 303 return means 304 else: 305 values = colex(listoflists,keepcols) 306 uniques = unique(values) 307 uniques.sort() 308 newlist = [] 309 if type(keepcols) not in [ListType,TupleType]: keepcols = [keepcols] 310 for item in uniques: 311 if type(item) not in [ListType,TupleType]: item =[item] 312 tmprows = linexand(listoflists,keepcols,item) 313 for col in collapsecols: 314 avgcol = colex(tmprows,col) 315 item.append(cfcn(avgcol)) 316 if fcn1 <> None: 317 try: 318 test = fcn1(avgcol) 319 except: 320 test = 'N/A' 321 item.append(test) 322 if fcn2 <> None: 323 try: 324 test = fcn2(avgcol) 325 except: 326 test = 'N/A' 327 item.append(test) 328 newlist.append(item) 329 return newlist 330 331
332 -def dm (listoflists,criterion):
333 """ 334 Returns rows from the passed list of lists that meet the criteria in 335 the passed criterion expression (a string as a function of x; e.g., 'x[3]>=9' 336 will return all rows where the 4th column>=9 and "x[2]=='N'" will return rows 337 with column 2 equal to the string 'N'). 338 339 Usage: dm (listoflists, criterion) 340 Returns: rows from listoflists that meet the specified criterion. 341 """ 342 function = 'filter(lambda x: '+criterion+',listoflists)' 343 lines = eval(function) 344 return lines
345 346
347 -def flat(l):
348 """ 349 Returns the flattened version of a '2D' list. List-correlate to the a.ravel()() 350 method of NumPy arrays. 351 352 Usage: flat(l) 353 """ 354 newl = [] 355 for i in range(len(l)): 356 for j in range(len(l[i])): 357 newl.append(l[i][j]) 358 return newl
359 360
361 -def linexand (listoflists,columnlist,valuelist):
362 """ 363 Returns the rows of a list of lists where col (from columnlist) = val 364 (from valuelist) for EVERY pair of values (columnlist[i],valuelists[i]). 365 len(columnlist) must equal len(valuelist). 366 367 Usage: linexand (listoflists,columnlist,valuelist) 368 Returns: the rows of listoflists where columnlist[i]=valuelist[i] for ALL i 369 """ 370 if type(columnlist) not in [ListType,TupleType]: 371 columnlist = [columnlist] 372 if type(valuelist) not in [ListType,TupleType]: 373 valuelist = [valuelist] 374 criterion = '' 375 for i in range(len(columnlist)): 376 if type(valuelist[i])==StringType: 377 critval = '\'' + valuelist[i] + '\'' 378 else: 379 critval = str(valuelist[i]) 380 criterion = criterion + ' x['+str(columnlist[i])+']=='+critval+' and' 381 criterion = criterion[0:-3] # remove the "and" after the last crit 382 function = 'filter(lambda x: '+criterion+',listoflists)' 383 lines = eval(function) 384 return lines
385 386
387 -def linexor (listoflists,columnlist,valuelist):
388 """ 389 Returns the rows of a list of lists where col (from columnlist) = val 390 (from valuelist) for ANY pair of values (colunmlist[i],valuelist[i[). 391 One value is required for each column in columnlist. If only one value 392 exists for columnlist but multiple values appear in valuelist, the 393 valuelist values are all assumed to pertain to the same column. 394 395 Usage: linexor (listoflists,columnlist,valuelist) 396 Returns: the rows of listoflists where columnlist[i]=valuelist[i] for ANY i 397 """ 398 if type(columnlist) not in [ListType,TupleType]: 399 columnlist = [columnlist] 400 if type(valuelist) not in [ListType,TupleType]: 401 valuelist = [valuelist] 402 criterion = '' 403 if len(columnlist) == 1 and len(valuelist) > 1: 404 columnlist = columnlist*len(valuelist) 405 for i in range(len(columnlist)): # build an exec string 406 if type(valuelist[i])==StringType: 407 critval = '\'' + valuelist[i] + '\'' 408 else: 409 critval = str(valuelist[i]) 410 criterion = criterion + ' x['+str(columnlist[i])+']=='+critval+' or' 411 criterion = criterion[0:-2] # remove the "or" after the last crit 412 function = 'filter(lambda x: '+criterion+',listoflists)' 413 lines = eval(function) 414 return lines
415 416
417 -def linedelimited (inlist,delimiter):
418 """ 419 Returns a string composed of elements in inlist, with each element 420 separated by 'delimiter.' Used by function writedelimited. Use '\t' 421 for tab-delimiting. 422 423 Usage: linedelimited (inlist,delimiter) 424 """ 425 outstr = '' 426 for item in inlist: 427 if type(item) <> StringType: 428 item = str(item) 429 outstr = outstr + item + delimiter 430 outstr = outstr[0:-1] 431 return outstr
432 433
434 -def lineincols (inlist,colsize):
435 """ 436 Returns a string composed of elements in inlist, with each element 437 right-aligned in columns of (fixed) colsize. 438 439 Usage: lineincols (inlist,colsize) where colsize is an integer 440 """ 441 outstr = '' 442 for item in inlist: 443 if type(item) <> StringType: 444 item = str(item) 445 size = len(item) 446 if size <= colsize: 447 for i in range(colsize-size): 448 outstr = outstr + ' ' 449 outstr = outstr + item 450 else: 451 outstr = outstr + item[0:colsize+1] 452 return outstr
453 454
455 -def lineincustcols (inlist,colsizes):
456 """ 457 Returns a string composed of elements in inlist, with each element 458 right-aligned in a column of width specified by a sequence colsizes. The 459 length of colsizes must be greater than or equal to the number of columns 460 in inlist. 461 462 Usage: lineincustcols (inlist,colsizes) 463 Returns: formatted string created from inlist 464 """ 465 outstr = '' 466 for i in range(len(inlist)): 467 if type(inlist[i]) <> StringType: 468 item = str(inlist[i]) 469 else: 470 item = inlist[i] 471 size = len(item) 472 if size <= colsizes[i]: 473 for j in range(colsizes[i]-size): 474 outstr = outstr + ' ' 475 outstr = outstr + item 476 else: 477 outstr = outstr + item[0:colsizes[i]+1] 478 return outstr
479 480
481 -def list2string (inlist,delimit=' '):
482 """ 483 Converts a 1D list to a single long string for file output, using 484 the string.join function. 485 486 Usage: list2string (inlist,delimit=' ') 487 Returns: the string created from inlist 488 """ 489 stringlist = map(makestr,inlist) 490 return string.join(stringlist,delimit)
491 492
493 -def makelol(inlist):
494 """ 495 Converts a 1D list to a 2D list (i.e., a list-of-lists). Useful when you 496 want to use put() to write a 1D list one item per line in the file. 497 498 Usage: makelol(inlist) 499 Returns: if l = [1,2,'hi'] then returns [[1],[2],['hi']] etc. 500 """ 501 x = [] 502 for item in inlist: 503 x.append([item]) 504 return x
505 506
507 -def makestr (x):
508 if type(x) <> StringType: 509 x = str(x) 510 return x
511 512
513 -def printcc (lst,extra=2):
514 """ 515 Prints a list of lists in columns, customized by the max size of items 516 within the columns (max size of items in col, plus 'extra' number of spaces). 517 Use 'dashes' or '\\n' in the list-of-lists to print dashes or blank lines, 518 respectively. 519 520 Usage: printcc (lst,extra=2) 521 Returns: None 522 """ 523 if type(lst[0]) not in [ListType,TupleType]: 524 lst = [lst] 525 rowstokill = [] 526 list2print = copy.deepcopy(lst) 527 for i in range(len(lst)): 528 if lst[i] == ['\n'] or lst[i]=='\n' or lst[i]=='dashes' or lst[i]=='' or lst[i]==['']: 529 rowstokill = rowstokill + [i] 530 rowstokill.reverse() # delete blank rows from the end 531 for row in rowstokill: 532 del list2print[row] 533 maxsize = [0]*len(list2print[0]) 534 for col in range(len(list2print[0])): 535 items = colex(list2print,col) 536 items = map(makestr,items) 537 maxsize[col] = max(map(len,items)) + extra 538 for row in lst: 539 if row == ['\n'] or row == '\n' or row == '' or row == ['']: 540 print 541 elif row == ['dashes'] or row == 'dashes': 542 dashes = [0]*len(maxsize) 543 for j in range(len(maxsize)): 544 dashes[j] = '-'*(maxsize[j]-2) 545 print lineincustcols(dashes,maxsize) 546 else: 547 print lineincustcols(row,maxsize) 548 return None
549 550
551 -def printincols (listoflists,colsize):
552 """ 553 Prints a list of lists in columns of (fixed) colsize width, where 554 colsize is an integer. 555 556 Usage: printincols (listoflists,colsize) 557 Returns: None 558 """ 559 for row in listoflists: 560 print lineincols(row,colsize) 561 return None
562 563
564 -def pl (listoflists):
565 """ 566 Prints a list of lists, 1 list (row) at a time. 567 568 Usage: pl(listoflists) 569 Returns: None 570 """ 571 for row in listoflists: 572 if row[-1] == '\n': 573 print row, 574 else: 575 print row 576 return None
577 578
579 -def printl(listoflists):
580 """Alias for pl.""" 581 pl(listoflists) 582 return
583 584
585 -def replace (inlst,oldval,newval):
586 """ 587 Replaces all occurrences of 'oldval' with 'newval', recursively. 588 589 Usage: replace (inlst,oldval,newval) 590 """ 591 lst = inlst*1 592 for i in range(len(lst)): 593 if type(lst[i]) not in [ListType,TupleType]: 594 if lst[i]==oldval: lst[i]=newval 595 else: 596 lst[i] = replace(lst[i],oldval,newval) 597 return lst
598 599
600 -def recode (inlist,listmap,cols=None):
601 """ 602 Changes the values in a list to a new set of values (useful when 603 you need to recode data from (e.g.) strings to numbers. cols defaults 604 to None (meaning all columns are recoded). 605 606 Usage: recode (inlist,listmap,cols=None) cols=recode cols, listmap=2D list 607 Returns: inlist with the appropriate values replaced with new ones 608 """ 609 lst = copy.deepcopy(inlist) 610 if cols != None: 611 if type(cols) not in [ListType,TupleType]: 612 cols = [cols] 613 for col in cols: 614 for row in range(len(lst)): 615 try: 616 idx = colex(listmap,0).index(lst[row][col]) 617 lst[row][col] = listmap[idx][1] 618 except ValueError: 619 pass 620 else: 621 for row in range(len(lst)): 622 for col in range(len(lst)): 623 try: 624 idx = colex(listmap,0).index(lst[row][col]) 625 lst[row][col] = listmap[idx][1] 626 except ValueError: 627 pass 628 return lst
629 630
631 -def remap (listoflists,criterion):
632 """ 633 Remaps values in a given column of a 2D list (listoflists). This requires 634 a criterion as a function of 'x' so that the result of the following is 635 returned ... map(lambda x: 'criterion',listoflists). 636 637 Usage: remap(listoflists,criterion) criterion=string 638 Returns: remapped version of listoflists 639 """ 640 function = 'map(lambda x: '+criterion+',listoflists)' 641 lines = eval(function) 642 return lines
643 644
645 -def roundlist (inlist,digits):
646 """ 647 Goes through each element in a 1D or 2D inlist, and applies the following 648 function to all elements of FloatType ... round(element,digits). 649 650 Usage: roundlist(inlist,digits) 651 Returns: list with rounded floats 652 """ 653 if type(inlist[0]) in [IntType, FloatType]: 654 inlist = [inlist] 655 l = inlist*1 656 for i in range(len(l)): 657 for j in range(len(l[i])): 658 if type(l[i][j])==FloatType: 659 l[i][j] = round(l[i][j],digits) 660 return l
661 662
663 -def sortby(listoflists,sortcols):
664 """ 665 Sorts a list of lists on the column(s) specified in the sequence 666 sortcols. 667 668 Usage: sortby(listoflists,sortcols) 669 Returns: sorted list, unchanged column ordering 670 """ 671 newlist = abut(colex(listoflists,sortcols),listoflists) 672 newlist.sort() 673 try: 674 numcols = len(sortcols) 675 except TypeError: 676 numcols = 1 677 crit = '[' + str(numcols) + ':]' 678 newlist = colex(newlist,crit) 679 return newlist
680 681
682 -def unique (inlist):
683 """ 684 Returns all unique items in the passed list. If the a list-of-lists 685 is passed, unique LISTS are found (i.e., items in the first dimension are 686 compared). 687 688 Usage: unique (inlist) 689 Returns: the unique elements (or rows) in inlist 690 """ 691 uniques = [] 692 for item in inlist: 693 if item not in uniques: 694 uniques.append(item) 695 return uniques
696
697 -def duplicates(inlist):
698 """ 699 Returns duplicate items in the FIRST dimension of the passed list. 700 701 Usage: duplicates (inlist) 702 """ 703 dups = [] 704 for i in range(len(inlist)): 705 if inlist[i] in inlist[i+1:]: 706 dups.append(inlist[i]) 707 return dups
708 709
710 -def nonrepeats(inlist):
711 """ 712 Returns items that are NOT duplicated in the first dim of the passed list. 713 714 Usage: nonrepeats (inlist) 715 """ 716 nonrepeats = [] 717 for i in range(len(inlist)): 718 if inlist.count(inlist[i]) == 1: 719 nonrepeats.append(inlist[i]) 720 return nonrepeats
721 722 723 #=================== PSTAT ARRAY FUNCTIONS ===================== 724 #=================== PSTAT ARRAY FUNCTIONS ===================== 725 #=================== PSTAT ARRAY FUNCTIONS ===================== 726 #=================== PSTAT ARRAY FUNCTIONS ===================== 727 #=================== PSTAT ARRAY FUNCTIONS ===================== 728 #=================== PSTAT ARRAY FUNCTIONS ===================== 729 #=================== PSTAT ARRAY FUNCTIONS ===================== 730 #=================== PSTAT ARRAY FUNCTIONS ===================== 731 #=================== PSTAT ARRAY FUNCTIONS ===================== 732 #=================== PSTAT ARRAY FUNCTIONS ===================== 733 #=================== PSTAT ARRAY FUNCTIONS ===================== 734 #=================== PSTAT ARRAY FUNCTIONS ===================== 735 #=================== PSTAT ARRAY FUNCTIONS ===================== 736 #=================== PSTAT ARRAY FUNCTIONS ===================== 737 #=================== PSTAT ARRAY FUNCTIONS ===================== 738 #=================== PSTAT ARRAY FUNCTIONS ===================== 739 740 try: # DEFINE THESE *ONLY* IF numpy IS AVAILABLE 741 import numpy as N 742
743 - def aabut (source, *args):
744 """ 745 Like the |Stat abut command. It concatenates two arrays column-wise 746 and returns the result. CAUTION: If one array is shorter, it will be 747 repeated until it is as long as the other. 748 749 Usage: aabut (source, args) where args=any # of arrays 750 Returns: an array as long as the LONGEST array past, source appearing on the 751 'left', arrays in <args> attached on the 'right'. 752 """ 753 if len(source.shape)==1: 754 width = 1 755 source = N.resize(source,[source.shape[0],width]) 756 else: 757 width = source.shape[1] 758 for addon in args: 759 if len(addon.shape)==1: 760 width = 1 761 addon = N.resize(addon,[source.shape[0],width]) 762 else: 763 width = source.shape[1] 764 if len(addon) < len(source): 765 addon = N.resize(addon,[source.shape[0],addon.shape[1]]) 766 elif len(source) < len(addon): 767 source = N.resize(source,[addon.shape[0],source.shape[1]]) 768 source = N.concatenate((source,addon),1) 769 return source
770 771
772 - def acolex (a,indices,axis=1):
773 """ 774 Extracts specified indices (a list) from passed array, along passed 775 axis (column extraction is default). BEWARE: A 1D array is presumed to be a 776 column-array (and that the whole array will be returned as a column). 777 778 Usage: acolex (a,indices,axis=1) 779 Returns: the columns of a specified by indices 780 """ 781 if type(indices) not in [ListType,TupleType,N.ndarray]: 782 indices = [indices] 783 if len(N.shape(a)) == 1: 784 cols = N.resize(a,[a.shape[0],1]) 785 else: 786 cols = N.take(a,indices,axis) 787 return cols
788 789
790 - def acollapse (a,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None):
791 """ 792 Averages data in collapsecol, keeping all unique items in keepcols 793 (using unique, which keeps unique LISTS of column numbers), retaining 794 the unique sets of values in keepcols, the mean for each. If stderror or 795 N of the mean are desired, set either or both parameters to 1. 796 797 Usage: acollapse (a,keepcols,collapsecols,fcn1=None,fcn2=None,cfcn=None) 798 Returns: unique 'conditions' specified by the contents of columns specified 799 by keepcols, abutted with the mean(s) of column(s) specified by 800 collapsecols 801 """ 802 def acollmean (inarray): 803 return N.sum(N.ravel(inarray))
804 805 if type(keepcols) not in [ListType,TupleType,N.ndarray]: 806 keepcols = [keepcols] 807 if type(collapsecols) not in [ListType,TupleType,N.ndarray]: 808 collapsecols = [collapsecols] 809 810 if cfcn == None: 811 cfcn = acollmean 812 if keepcols == []: 813 avgcol = acolex(a,collapsecols) 814 means = N.sum(avgcol)/float(len(avgcol)) 815 if fcn1<>None: 816 try: 817 test = fcn1(avgcol) 818 except: 819 test = N.array(['N/A']*len(means)) 820 means = aabut(means,test) 821 if fcn2<>None: 822 try: 823 test = fcn2(avgcol) 824 except: 825 test = N.array(['N/A']*len(means)) 826 means = aabut(means,test) 827 return means 828 else: 829 if type(keepcols) not in [ListType,TupleType,N.ndarray]: 830 keepcols = [keepcols] 831 values = colex(a,keepcols) # so that "item" can be appended (below) 832 uniques = unique(values) # get a LIST, so .sort keeps rows intact 833 uniques.sort() 834 newlist = [] 835 for item in uniques: 836 if type(item) not in [ListType,TupleType,N.ndarray]: 837 item =[item] 838 tmprows = alinexand(a,keepcols,item) 839 for col in collapsecols: 840 avgcol = acolex(tmprows,col) 841 item.append(acollmean(avgcol)) 842 if fcn1<>None: 843 try: 844 test = fcn1(avgcol) 845 except: 846 test = 'N/A' 847 item.append(test) 848 if fcn2<>None: 849 try: 850 test = fcn2(avgcol) 851 except: 852 test = 'N/A' 853 item.append(test) 854 newlist.append(item) 855 try: 856 new_a = N.array(newlist) 857 except TypeError: 858 new_a = N.array(newlist,'O') 859 return new_a 860 861
862 - def adm (a,criterion):
863 """ 864 Returns rows from the passed list of lists that meet the criteria in 865 the passed criterion expression (a string as a function of x). 866 867 Usage: adm (a,criterion) where criterion is like 'x[2]==37' 868 """ 869 function = 'filter(lambda x: '+criterion+',a)' 870 lines = eval(function) 871 try: 872 lines = N.array(lines) 873 except: 874 lines = N.array(lines,dtype='O') 875 return lines
876 877
878 - def isstring(x):
879 if type(x)==StringType: 880 return 1 881 else: 882 return 0
883 884
885 - def alinexand (a,columnlist,valuelist):
886 """ 887 Returns the rows of an array where col (from columnlist) = val 888 (from valuelist). One value is required for each column in columnlist. 889 890 Usage: alinexand (a,columnlist,valuelist) 891 Returns: the rows of a where columnlist[i]=valuelist[i] for ALL i 892 """ 893 if type(columnlist) not in [ListType,TupleType,N.ndarray]: 894 columnlist = [columnlist] 895 if type(valuelist) not in [ListType,TupleType,N.ndarray]: 896 valuelist = [valuelist] 897 criterion = '' 898 for i in range(len(columnlist)): 899 if type(valuelist[i])==StringType: 900 critval = '\'' + valuelist[i] + '\'' 901 else: 902 critval = str(valuelist[i]) 903 criterion = criterion + ' x['+str(columnlist[i])+']=='+critval+' and' 904 criterion = criterion[0:-3] # remove the "and" after the last crit 905 return adm(a,criterion)
906 907
908 - def alinexor (a,columnlist,valuelist):
909 """ 910 Returns the rows of an array where col (from columnlist) = val (from 911 valuelist). One value is required for each column in columnlist. 912 The exception is if either columnlist or valuelist has only 1 value, 913 in which case that item will be expanded to match the length of the 914 other list. 915 916 Usage: alinexor (a,columnlist,valuelist) 917 Returns: the rows of a where columnlist[i]=valuelist[i] for ANY i 918 """ 919 if type(columnlist) not in [ListType,TupleType,N.ndarray]: 920 columnlist = [columnlist] 921 if type(valuelist) not in [ListType,TupleType,N.ndarray]: 922 valuelist = [valuelist] 923 criterion = '' 924 if len(columnlist) == 1 and len(valuelist) > 1: 925 columnlist = columnlist*len(valuelist) 926 elif len(valuelist) == 1 and len(columnlist) > 1: 927 valuelist = valuelist*len(columnlist) 928 for i in range(len(columnlist)): 929 if type(valuelist[i])==StringType: 930 critval = '\'' + valuelist[i] + '\'' 931 else: 932 critval = str(valuelist[i]) 933 criterion = criterion + ' x['+str(columnlist[i])+']=='+critval+' or' 934 criterion = criterion[0:-2] # remove the "or" after the last crit 935 return adm(a,criterion)
936 937
938 - def areplace (a,oldval,newval):
939 """ 940 Replaces all occurrences of oldval with newval in array a. 941 942 Usage: areplace(a,oldval,newval) 943 """ 944 return N.where(a==oldval,newval,a)
945 946
947 - def arecode (a,listmap,col='all'):
948 """ 949 Remaps the values in an array to a new set of values (useful when 950 you need to recode data from (e.g.) strings to numbers as most stats 951 packages require. Can work on SINGLE columns, or 'all' columns at once. 952 @@@BROKEN 2007-11-26 953 954 Usage: arecode (a,listmap,col='all') 955 Returns: a version of array a where listmap[i][0] = (instead) listmap[i][1] 956 """ 957 ashape = a.shape 958 if col == 'all': 959 work = a.ravel() 960 else: 961 work = acolex(a,col) 962 work = work.ravel() 963 for pair in listmap: 964 if type(pair[1]) == StringType or work.dtype.char=='O' or a.dtype.char=='O': 965 work = N.array(work,dtype='O') 966 a = N.array(a,dtype='O') 967 for i in range(len(work)): 968 if work[i]==pair[0]: 969 work[i] = pair[1] 970 if col == 'all': 971 return N.reshape(work,ashape) 972 else: 973 return N.concatenate([a[:,0:col],work[:,N.newaxis],a[:,col+1:]],1) 974 else: # must be a non-Object type array and replacement 975 work = N.where(work==pair[0],pair[1],work) 976 return N.concatenate([a[:,0:col],work[:,N.newaxis],a[:,col+1:]],1)
977 978
979 - def arowcompare(row1, row2):
980 """ 981 Compares two rows from an array, regardless of whether it is an 982 array of numbers or of python objects (which requires the cmp function). 983 @@@PURPOSE? 2007-11-26 984 985 Usage: arowcompare(row1,row2) 986 Returns: an array of equal length containing 1s where the two rows had 987 identical elements and 0 otherwise 988 """ 989 return 990 if row1.dtype.char=='O' or row2.dtype=='O': 991 cmpvect = N.logical_not(abs(N.array(map(cmp,row1,row2)))) # cmp fcn gives -1,0,1 992 else: 993 cmpvect = N.equal(row1,row2) 994 return cmpvect
995 996
997 - def arowsame(row1, row2):
998 """ 999 Compares two rows from an array, regardless of whether it is an 1000 array of numbers or of python objects (which requires the cmp function). 1001 1002 Usage: arowsame(row1,row2) 1003 Returns: 1 if the two rows are identical, 0 otherwise. 1004 """ 1005 cmpval = N.alltrue(arowcompare(row1,row2)) 1006 return cmpval
1007 1008
1009 - def asortrows(a,axis=0):
1010 """ 1011 Sorts an array "by rows". This differs from the Numeric.sort() function, 1012 which sorts elements WITHIN the given axis. Instead, this function keeps 1013 the elements along the given axis intact, but shifts them 'up or down' 1014 relative to one another. 1015 1016 Usage: asortrows(a,axis=0) 1017 Returns: sorted version of a 1018 """ 1019 return N.sort(a,axis=axis,kind='mergesort')
1020 1021
1022 - def aunique(inarray):
1023 """ 1024 Returns unique items in the FIRST dimension of the passed array. Only 1025 works on arrays NOT including string items. 1026 1027 Usage: aunique (inarray) 1028 """ 1029 uniques = N.array([inarray[0]]) 1030 if len(uniques.shape) == 1: # IF IT'S A 1D ARRAY 1031 for item in inarray[1:]: 1032 if N.add.reduce(N.equal(uniques,item).ravel()) == 0: 1033 try: 1034 uniques = N.concatenate([uniques,N.array[N.newaxis,:]]) 1035 except TypeError: 1036 uniques = N.concatenate([uniques,N.array([item])]) 1037 else: # IT MUST BE A 2+D ARRAY 1038 if inarray.dtype.char != 'O': # not an Object array 1039 for item in inarray[1:]: 1040 if not N.sum(N.alltrue(N.equal(uniques,item),1)): 1041 try: 1042 uniques = N.concatenate( [uniques,item[N.newaxis,:]] ) 1043 except TypeError: # the item to add isn't a list 1044 uniques = N.concatenate([uniques,N.array([item])]) 1045 else: 1046 pass # this item is already in the uniques array 1047 else: # must be an Object array, alltrue/equal functions don't work 1048 for item in inarray[1:]: 1049 newflag = 1 1050 for unq in uniques: # NOTE: cmp --> 0=same, -1=<, 1=> 1051 test = N.sum(abs(N.array(map(cmp,item,unq)))) 1052 if test == 0: # if item identical to any 1 row in uniques 1053 newflag = 0 # then not a novel item to add 1054 break 1055 if newflag == 1: 1056 try: 1057 uniques = N.concatenate( [uniques,item[N.newaxis,:]] ) 1058 except TypeError: # the item to add isn't a list 1059 uniques = N.concatenate([uniques,N.array([item])]) 1060 return uniques
1061 1062
1063 - def aduplicates(inarray):
1064 """ 1065 Returns duplicate items in the FIRST dimension of the passed array. Only 1066 works on arrays NOT including string items. 1067 1068 Usage: aunique (inarray) 1069 """ 1070 inarray = N.array(inarray) 1071 if len(inarray.shape) == 1: # IF IT'S A 1D ARRAY 1072 dups = [] 1073 inarray = inarray.tolist() 1074 for i in range(len(inarray)): 1075 if inarray[i] in inarray[i+1:]: 1076 dups.append(inarray[i]) 1077 dups = aunique(dups) 1078 else: # IT MUST BE A 2+D ARRAY 1079 dups = [] 1080 aslist = inarray.tolist() 1081 for i in range(len(aslist)): 1082 if aslist[i] in aslist[i+1:]: 1083 dups.append(aslist[i]) 1084 dups = unique(dups) 1085 dups = N.array(dups) 1086 return dups
1087 1088 except ImportError: # IF NUMERIC ISN'T AVAILABLE, SKIP ALL arrayfuncs 1089 pass 1090