Package ete2 :: Package parser :: Module text_arraytable
[hide private]
[frames] | no frames]

Source Code for Module ete2.parser.text_arraytable

  1  __VERSION__="ete2-2.0rev104"  
  2  # #START_LICENSE########################################################### 
  3  # 
  4  # Copyright (C) 2009 by Jaime Huerta Cepas. All rights reserved.   
  5  # email: jhcepas@gmail.com 
  6  # 
  7  # This file is part of the Environment for Tree Exploration program (ETE).  
  8  # http://ete.cgenomics.org 
  9  #   
 10  # ETE is free software: you can redistribute it and/or modify it 
 11  # under the terms of the GNU General Public License as published by 
 12  # the Free Software Foundation, either version 3 of the License, or 
 13  # (at your option) any later version. 
 14  #   
 15  # ETE is distributed in the hope that it will be useful, 
 16  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 17  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 18  # GNU General Public License for more details. 
 19  #   
 20  # You should have received a copy of the GNU General Public License 
 21  # along with ETE.  If not, see <http://www.gnu.org/licenses/>. 
 22  # 
 23  # #END_LICENSE############################################################# 
 24   
 25  #! /usr/bin/env python 
 26   
 27  import re 
 28  from sys import stderr 
 29  import numpy 
 30   
 31   
 32  __all__ = ['read_arraytable', 'write_arraytable'] 
 33   
34 -def read_arraytable(matrix_file, mtype="float", arraytable_object = None):
35 """ Reads a text tab-delimited matrix from file """ 36 37 if arraytable_object is None: 38 from ete2.coretype import arraytable 39 A = arraytable.ArrayTable() 40 else: 41 A = arraytable_object 42 43 A.mtype = mtype 44 temp_matrix = [] 45 rowname_counter = {} 46 colname_counter = {} 47 row_dup_flag = False 48 col_dup_flag = False 49 50 # if matrix_file has many lines, tries to read it as the matrix 51 # itself. 52 if len(matrix_file.split("\n"))>1: 53 matrix_data = matrix_file.split("\n") 54 else: 55 matrix_data = open(matrix_file) 56 57 for line in matrix_data: 58 # Clean up line 59 line = line.strip("\n") 60 #line = line.replace(" ","") 61 # Skip empty lines 62 if not line: 63 continue 64 # Get fields in line 65 fields = line.split("\t") 66 # Read column names 67 if line[0]=='#' and re.match("#NAMES",fields[0],re.IGNORECASE): 68 counter = 0 69 for colname in fields[1:]: 70 colname = colname.strip() 71 72 # Handle duplicated col names by adding a number 73 colname_counter[colname] = colname_counter.get(colname,0) + 1 74 if colname in A.colValues: 75 colname += "_%d" % colname_counter[colname] 76 col_dup_flag = True 77 # Adds colname 78 A.colValues[colname] = None 79 A.colNames.append(colname) 80 if col_dup_flag: 81 print >>stderr, "Duplicated column names were renamed." 82 83 # Skip comments 84 elif line[0]=='#': 85 continue 86 87 # Read values (only when column names are loaded) 88 elif A.colNames: 89 # Checks shape 90 if len(fields)-1 != len(A.colNames): 91 raise ValueError, "Invalid number of columns. Expecting:%d" % len(A.colNames) 92 93 # Extracts row name and remove it from fields 94 rowname = fields.pop(0).strip() 95 96 # Handles duplicated row names by adding a number 97 rowname_counter[rowname] = rowname_counter.get(rowname,0) + 1 98 if rowname in A.rowValues: 99 rowname += "_%d" % rowname_counter[rowname] 100 row_dup_names = True 101 102 # Adds row name 103 A.rowValues[rowname] = None 104 A.rowNames.append(rowname) 105 106 # Reads row values 107 values = [] 108 for f in fields: 109 if f.strip()=="": 110 f = numpy.nan 111 values.append(f) 112 temp_matrix.append(values) 113 else: 114 raise ValueError, "Column names are required." 115 116 if row_dup_flag: 117 print >>stderr, "Duplicated row names were renamed." 118 119 # Convert all read lines into a numpy matrix 120 vmatrix = numpy.array(temp_matrix).astype(A.mtype) 121 122 # Updates indexes to link names and vectors in matrix 123 A._link_names2matrix(vmatrix) 124 return A
125
126 -def write_arraytable(A, fname, colnames=[]):
127 if colnames == []: 128 colnames = A.colNames 129 matrix = A.get_several_column_vectors(colnames) 130 matrix = matrix.swapaxes(0,1) 131 OUT = open(fname,"w") 132 print >>OUT, '\t'.join(["#NAMES"]+colnames) 133 counter = 0 134 for rname in A.rowNames: 135 print >>OUT, '\t'.join(map(str,[rname]+matrix[counter].tolist())) 136 counter +=1 137 OUT.close()
138