1 __VERSION__="ete2-2.0rev90"
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25 from sys import stderr
26 import clustvalidation
27 from numpy import nan as NaN
28 from ete2.coretype.tree import _translate_nodes
29 from ete2 import TreeNode, ArrayTable
30
31 __all__ = ["ClusterNode", "ClusterTree"]
32
34 """ Creates a new Cluster Tree object, which is a collection
35 of ClusterNode instances connected in a hierarchical way, and
36 representing a clustering result.
37
38 a newick file or string can be passed as the first argument. An
39 ArrayTable file or instance can be passed as a second argument.
40
41 Examples:
42 t1 = Tree() # creates an empty tree
43 t2 = Tree( '(A:1,(B:1,(C:1,D:1):0.5):0.5);' )
44 t3 = Tree( '/home/user/myNewickFile.txt' )
45 """
46
48 raise ValueError, "This attribute can not be manually set."
49
51 if self._silhouette == None:
52 self.get_silhouette()
53 return self._intracluster_dist
54
56 if self._silhouette == None:
57 self.get_silhouette()
58 return self._intercluster_dist
59
61 if self._silhouette == None:
62 self.get_silhouette()
63 return self._silhouette
64
69
74
77
78 intracluster_dist = property(fget=_get_intra, fset=_set_forbidden)
79 intercluster_dist = property(fget=_get_inter, fset=_set_forbidden)
80 silhouette = property(fget=_get_silh, fset=_set_forbidden)
81 profile = property(fget=_get_prof, fset=_set_profile)
82 deviation = property(fget=_get_std, fset=_set_forbidden)
83
86
87
88
89
90 TreeNode.__init__(self, newick)
91 self._fdist = None
92 self._silhouette = None
93 self._intercluster_dist = None
94 self._intracluster_dist = None
95 self._profile = None
96 self._std_profile = None
97
98
99 self.features.add("intercluster_dist")
100 self.features.add("intracluster_dist")
101 self.features.add("silhouette")
102 self.features.add("profile")
103 self.features.add("deviation")
104
105
106 if text_array:
107 self.link_to_arraytable(text_array)
108
109 if newick:
110 self.set_distance_function(fdist)
111
113 """ Sets the distance function used to calculate cluster
114 distances and silouette indexex.
115
116 ARGUMENTS:
117
118 fn: a pointer to python function acepting two arrays (numpy) as
119 arguments.
120
121 EXAMPLE:
122
123 # A simple euclidean distance
124 my_dist_fn = lambda x,y: abs(x-y)
125 tree.set_distance_function(my_dist_fn)
126
127 """
128 for n in self.traverse():
129 n._fdist = fn
130 self._silhouette = None
131 self._intercluster_dist = None
132 self._intracluster_dist = None
133
135 """ Allows to link a given arraytable object to the tree
136 structure under this node. Row names in the arraytable object
137 are expected to match leaf names.
138
139 Returns a list of nodes for with profiles could not been found
140 in arraytable.
141
142 """
143
144
145
146 if type(arraytbl) == ArrayTable:
147 array = arraytbl
148 else:
149 array = ArrayTable(arraytbl)
150
151 missing_leaves = []
152 for n in self.traverse():
153 n.arraytable = array
154 if n.is_leaf() and n.name in array.rowNames:
155 n._profile = array.get_row_vector(n.name)
156 elif n.is_leaf():
157 n._profile = [NaN]*len(array.colNames)
158 missing_leaves.append(n)
159
160
161 if len(missing_leaves)>0:
162 print >>stderr, \
163 """[%d] leaf names could not be mapped to the matrix rows.""" %\
164 len(missing_leaves)
165
166 self.arraytable = array
167
169 """ Returns an iterator over all the profiles associated to
170 the leaves under this node."""
171 for l in self.iter_leaves():
172 yield l.get_profile()[0]
173
175 """ Returns the list of all the profiles associated to the
176 leaves under this node."""
177 return [l.get_profile()[0] for l in self.iter_leaves()]
178
180 """ Calculates the node's silhouette value by using a given
181 distance function. By default, euclidean distance is used. It
182 also calculates the deviation profile, mean profile, and
183 inter/intra-cluster distances.
184
185 It sets the following features into the analyzed node:
186 - node.intracluster
187 - node.intercluster
188 - node.silhouete
189
190 intracluster distances a(i) are calculated as the Centroid
191 Diameter
192
193 intercluster distances b(i) are calculated as the Centroid linkage distance
194
195 ** Rousseeuw, P.J. (1987) Silhouettes: A graphical aid to the
196 interpretation and validation of cluster analysis.
197 J. Comput. Appl. Math., 20, 53-65.
198
199 """
200 if fdist is None:
201 fdist = self._fdist
202
203
204 self._silhouette, self._intracluster_dist, self._intercluster_dist = \
205 clustvalidation.get_silhouette_width(fdist, self)
206
207 return self._silhouette, self._intracluster_dist, self._intercluster_dist
208
209 - def get_dunn(self, clusters, fdist=None):
210 """ Calculates the Dunn index for the given set of descendant
211 nodes.
212 """
213
214 if fdist is None:
215 fdist = self._fdist
216 nodes = _translate_nodes(self, *clusters)
217 return clustvalidation.get_dunn_index(fdist, *nodes)
218
220 """ This internal function updates the mean profile
221 associated to an internal node. """
222
223
224 self._profile, self._std_profile = clustvalidation.get_avg_profile(self)
225
226
227
228 ClusterTree = ClusterNode
229