1 __VERSION__="ete2-2.0rev104"
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25 from sys import stderr
26 import numpy
27 import clustvalidation
28 from numpy import nan as NaN
29 from ete2.coretype.tree import _translate_nodes
30 from ete2 import TreeNode, ArrayTable
31
32 __all__ = ["ClusterNode", "ClusterTree"]
33
35 """ Creates a new Cluster Tree object, which is a collection
36 of ClusterNode instances connected in a hierarchical way, and
37 representing a clustering result.
38
39 a newick file or string can be passed as the first argument. An
40 ArrayTable file or instance can be passed as a second argument.
41
42 Examples:
43 t1 = Tree() # creates an empty tree
44 t2 = Tree( '(A:1,(B:1,(C:1,D:1):0.5):0.5);' )
45 t3 = Tree( '/home/user/myNewickFile.txt' )
46 """
47
49 raise ValueError, "This attribute can not be manually set."
50
52 if self._silhouette == None:
53 self.get_silhouette()
54 return self._intracluster_dist
55
57 if self._silhouette == None:
58 self.get_silhouette()
59 return self._intercluster_dist
60
62 if self._silhouette == None:
63 self.get_silhouette()
64 return self._silhouette
65
70
75
78
79 intracluster_dist = property(fget=_get_intra, fset=_set_forbidden)
80 intercluster_dist = property(fget=_get_inter, fset=_set_forbidden)
81 silhouette = property(fget=_get_silh, fset=_set_forbidden)
82 profile = property(fget=_get_prof, fset=_set_profile)
83 deviation = property(fget=_get_std, fset=_set_forbidden)
84
87
88
89
90
91 TreeNode.__init__(self, newick)
92 self._fdist = None
93 self._silhouette = None
94 self._intercluster_dist = None
95 self._intracluster_dist = None
96 self._profile = None
97 self._std_profile = None
98
99
100 self.features.add("intercluster_dist")
101 self.features.add("intracluster_dist")
102 self.features.add("silhouette")
103 self.features.add("profile")
104 self.features.add("deviation")
105
106
107 if text_array:
108 self.link_to_arraytable(text_array)
109
110 if newick:
111 self.set_distance_function(fdist)
112
114 """ Sets the distance function used to calculate cluster
115 distances and silouette index.
116
117 ARGUMENTS:
118
119 fn: a pointer to python function acepting two arrays (numpy) as
120 arguments.
121
122 EXAMPLE:
123
124 # A simple euclidean distance
125 my_dist_fn = lambda x,y: abs(x-y)
126 tree.set_distance_function(my_dist_fn)
127
128 """
129 for n in self.traverse():
130 n._fdist = fn
131 n._silhouette = None
132 n._intercluster_dist = None
133 n._intracluster_dist = None
134
136 """ Allows to link a given arraytable object to the tree
137 structure under this node. Row names in the arraytable object
138 are expected to match leaf names.
139
140 Returns a list of nodes for with profiles could not been found
141 in arraytable.
142
143 """
144
145
146
147 if type(arraytbl) == ArrayTable:
148 array = arraytbl
149 else:
150 array = ArrayTable(arraytbl)
151
152 missing_leaves = []
153 matrix_values = [i for r in xrange(len(array.matrix))\
154 for i in array.matrix[r] if numpy.isfinite(i)]
155
156 array._matrix_min = min(matrix_values)
157 array._matrix_max = max(matrix_values)
158
159 for n in self.traverse():
160 n.arraytable = array
161 if n.is_leaf() and n.name in array.rowNames:
162 n._profile = array.get_row_vector(n.name)
163 elif n.is_leaf():
164 n._profile = [NaN]*len(array.colNames)
165 missing_leaves.append(n)
166
167
168 if len(missing_leaves)>0:
169 print >>stderr, \
170 """[%d] leaf names could not be mapped to the matrix rows.""" %\
171 len(missing_leaves)
172
173 self.arraytable = array
174
176 """ Returns an iterator over all the profiles associated to
177 the leaves under this node."""
178 for l in self.iter_leaves():
179 yield l.get_profile()[0]
180
182 """ Returns the list of all the profiles associated to the
183 leaves under this node."""
184 return [l.get_profile()[0] for l in self.iter_leaves()]
185
187 """ Calculates the node's silhouette value by using a given
188 distance function. By default, euclidean distance is used. It
189 also calculates the deviation profile, mean profile, and
190 inter/intra-cluster distances.
191
192 It sets the following features into the analyzed node:
193 - node.intracluster
194 - node.intercluster
195 - node.silhouete
196
197 intracluster distances a(i) are calculated as the Centroid
198 Diameter
199
200 intercluster distances b(i) are calculated as the Centroid linkage distance
201
202 ** Rousseeuw, P.J. (1987) Silhouettes: A graphical aid to the
203 interpretation and validation of cluster analysis.
204 J. Comput. Appl. Math., 20, 53-65.
205
206 """
207 if fdist is None:
208 fdist = self._fdist
209
210
211 self._silhouette, self._intracluster_dist, self._intercluster_dist = \
212 clustvalidation.get_silhouette_width(fdist, self)
213
214 return self._silhouette, self._intracluster_dist, self._intercluster_dist
215
216 - def get_dunn(self, clusters, fdist=None):
217 """ Calculates the Dunn index for the given set of descendant
218 nodes.
219 """
220
221 if fdist is None:
222 fdist = self._fdist
223 nodes = _translate_nodes(self, *clusters)
224 return clustvalidation.get_dunn_index(fdist, *nodes)
225
227 """ This internal function updates the mean profile
228 associated to an internal node. """
229
230
231 self._profile, self._std_profile = clustvalidation.get_avg_profile(self)
232
233
234
235 ClusterTree = ClusterNode
236