1 __VERSION__="ete2-2.0rev90"
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26 import sys
27 import re
28 import math
29 from os import path
30
31 import numpy
32 from ete2.parser.text_arraytable import write_arraytable, read_arraytable
33
34 __all__ = ["ArrayTable"]
35
37 """This object is thought to work with matrix datasets (like
38 microarrays). It allows to load the matrix an access easily to row
39 and column vectors. """
40
42 return str(self.matrix)
43
44 - def __init__(self, matrix_file=None, mtype="float"):
45 self.colNames = []
46 self.rowNames = []
47 self.colValues = {}
48 self.rowValues = {}
49 self.matrix = None
50 self.mtype = None
51
52
53 if matrix_file is not None:
54 read_arraytable(matrix_file, \
55 mtype=mtype, \
56 arraytable_object = self)
57
59 """ Returns the vector associated to the given row name """
60 return self.rowValues.get(rowname,None)
61
62
64 """ Returns the vector associated to the given column name """
65 return self.colValues.get(colname,None)
66
67
69 """ Returns a list of vectors associated to several column names """
70 vectors = [self.colValues[cname] for cname in colnames]
71 return numpy.array(vectors)
72
74 """ Returns a list vectors associated to several row names """
75 vectors = [self.rowValues[rname] for rname in rownames]
76 return numpy.array(vectors)
77
79 """Removes the given column form the current dataset """
80 col_value = self.colValues.pop(colname, None)
81 if col_value != None:
82 new_indexes = range(len(self.colNames))
83 index = self.colNames.index(colname)
84 self.colNames.pop(index)
85 new_indexes.pop(index)
86 newmatrix = self.matrix.swapaxes(0,1)
87 newmatrix = newmatrix[new_indexes].swapaxes(0,1)
88 self._link_names2matrix(newmatrix)
89
91 """ Returns a new ArrayTable object in which columns are
92 merged according to a given criterion.
93
94 'groups' argument must be a dictionary in which keys are the
95 new column names, and each value is the list of current
96 column names to be merged.
97
98 'grouping_criterion' must be 'min', 'max' or 'mean', and
99 defines how numeric values will be merged.
100
101 Example:
102 my_groups = {'NewColumn':['column5', 'column6']}
103 new_Array = Array.merge_columns(my_groups, 'max')
104
105 """
106
107 if grouping_criterion == "max":
108 grouping_f = get_max_vector
109 elif grouping_criterion == "min":
110 grouping_f = get_min_vector
111 elif grouping_criterion == "mean":
112 grouping_f = get_mean_vector
113 else:
114 raise ValueError, "grouping_criterion not supported. Use max|min|mean "
115
116 grouped_array = self.__class__()
117 grouped_matrix = []
118 colNames = []
119 alltnames = set([])
120 for gname,tnames in groups.iteritems():
121 all_vectors=[]
122 for tn in tnames:
123 if tn not in self.colValues:
124 raise ValueError, str(tn)+" column not found."
125 if tn in alltnames:
126 raise ValueError, str(tn)+" duplicated column name for merging"
127 alltnames.add(tn)
128 vector = self.get_column_vector(tn).astype(float)
129 all_vectors.append(vector)
130
131 grouped_matrix.append(grouping_f(all_vectors))
132
133 colNames.append(gname)
134
135 for cname in self.colNames:
136 if cname not in alltnames:
137 grouped_matrix.append(self.get_column_vector(cname))
138 colNames.append(cname)
139
140 grouped_array.rowNames= self.rowNames
141 grouped_array.colNames= colNames
142 vmatrix = numpy.array(grouped_matrix).transpose()
143 grouped_array._link_names2matrix(vmatrix)
144 return grouped_array
145
147 """ Returns a new ArrayTable in which current matrix is transposed. """
148
149 transposedA = self.__class__()
150 transposedM = self.matrix.transpose()
151 transposedA.colNames = list(self.rowNames)
152 transposedA.rowNames = list(self.colNames)
153 transposedA._link_names2matrix(transposedM)
154
155
156
157
158
159
160 return transposedA
161
163 """ Synchronize curent column and row names to the given matrix"""
164 if len(self.rowNames) != m.shape[0]:
165 raise ValueError , "Expecting matrix with %d rows" % m.size[0]
166
167 if len(self.colNames) != m.shape[1]:
168 raise ValueError , "Expecting matrix with %d columns" % m.size[1]
169
170 self.matrix = m
171 self.colValues.clear()
172 self.rowValues.clear()
173
174 i = 0
175 for colname in self.colNames:
176 self.colValues[colname] = self.matrix[:,i]
177 i+=1
178
179 i = 0
180 for rowname in self.rowNames:
181 self.rowValues[rowname] = self.matrix[i,:]
182 i+=1
183
184 - def write(self, fname, colnames=[]):
186
187
188
190 d = 0.0
191 for v in vlist:
192 d += fdist(v,vcenter)
193 return 2*(d / len(vlist))
194
196 d1,d2 = 0.0, 0.0
197 for v in vlist1:
198 d1 += fdist(v,vcenter2)
199 for v in vlist2:
200 d2 += fdist(v,vcenter1)
201 return (d1+d2) / (len(vlist1)+len(vlist2))
202
204 """ Returns mean value discarding non finite values """
205 valid_values = []
206 for v in values:
207 if numpy.isfinite(v):
208 valid_values.append(v)
209 return numpy.mean(valid_values), numpy.std(valid_values)
210
212 """ Returns mean profile discarding non finite values """
213
214 if len(vectors)==1:
215 return vectors[0], numpy.zeros(len(vectors[0]))
216
217 length = len(vectors[0])
218
219 safe_mean = []
220 safe_std = []
221
222 for pos in xrange(length):
223 pos_mean = []
224 for v in vectors:
225 if numpy.isfinite(v[pos]):
226 pos_mean.append(v[pos])
227 safe_mean.append(numpy.mean(pos_mean))
228 safe_std.append(numpy.std(pos_mean))
229 return safe_mean, safe_std
230
232 a = numpy.array(vlist)
233 return numpy.mean(a,0)
234
238
240 a = numpy.array(vlist)
241 return numpy.max(a,0)
242
244 a = numpy.array(vlist)
245 return numpy.min(a,0)
246