1 __VERSION__="ete2-2.0rev86"
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27 import re
28 from sys import stderr
29 import numpy
30
31
32 __all__ = ['read_arraytable', 'write_arraytable']
33
35 """ Reads a text tab-delimited matrix from file """
36
37 if arraytable_object is None:
38 from ete2.coretype import arraytable
39 A = arraytable.ArrayTable()
40 else:
41 A = arraytable_object
42
43 A.mtype = mtype
44 temp_matrix = []
45 rowname_counter = {}
46 colname_counter = {}
47 row_dup_flag = False
48 col_dup_flag = False
49
50
51
52 if len(matrix_file.split("\n"))>1:
53 matrix_data = matrix_file.split("\n")
54 else:
55 matrix_data = open(matrix_file)
56
57 for line in matrix_data:
58
59 line = line.strip("\n")
60
61
62 if not line:
63 continue
64
65 fields = line.split("\t")
66
67 if line[0]=='#' and re.match("#NAMES",fields[0],re.IGNORECASE):
68 counter = 0
69 for colname in fields[1:]:
70 colname = colname.strip()
71
72
73 colname_counter[colname] = colname_counter.get(colname,0) + 1
74 if colname in A.colValues:
75 colname += "_%d" % colname_counter[colname]
76 col_dup_flag = True
77
78 A.colValues[colname] = None
79 A.colNames.append(colname)
80 if col_dup_flag:
81 print >>stderr, "Duplicated column names were renamed."
82
83
84 elif line[0]=='#':
85 continue
86
87
88 elif A.colNames:
89
90 if len(fields)-1 != len(A.colNames):
91 raise ValueError, "Invalid number of columns. Expecting:%d" % len(A.colNames)
92
93
94 rowname = fields.pop(0).strip()
95
96
97 rowname_counter[rowname] = rowname_counter.get(rowname,0) + 1
98 if rowname in A.rowValues:
99 rowname += "_%d" % rowname_counter[rowname]
100 row_dup_names = True
101
102
103 A.rowValues[rowname] = None
104 A.rowNames.append(rowname)
105
106
107 values = []
108 for f in fields:
109 if f.strip()=="":
110 f = numpy.nan
111 values.append(f)
112 temp_matrix.append(values)
113 else:
114 raise ValueError, "Column names are required."
115
116 if row_dup_flag:
117 print >>stderr, "Duplicated row names were renamed."
118
119
120 vmatrix = numpy.array(temp_matrix).astype(A.mtype)
121
122
123 A._link_names2matrix(vmatrix)
124 return A
125
127 if colnames == []:
128 colnames = A.colNames
129 matrix = A.get_several_column_vectors(colnames)
130 matrix = matrix.swapaxes(0,1)
131 OUT = open(fname,"w")
132 print >>OUT, '\t'.join(["#NAMES"]+colnames)
133 counter = 0
134 for rname in A.rowNames:
135 print >>OUT, '\t'.join(map(str,[rname]+matrix[counter].tolist()))
136 counter +=1
137 OUT.close()
138