Package ete2 :: Package clustering :: Module clustertree
[hide private]
[frames] | no frames]

Source Code for Module ete2.clustering.clustertree

  1  __VERSION__="ete2-2.0rev104"  
  2  # #START_LICENSE########################################################### 
  3  # 
  4  # Copyright (C) 2009 by Jaime Huerta Cepas. All rights reserved.   
  5  # email: jhcepas@gmail.com 
  6  # 
  7  # This file is part of the Environment for Tree Exploration program (ETE).  
  8  # http://ete.cgenomics.org 
  9  #   
 10  # ETE is free software: you can redistribute it and/or modify it 
 11  # under the terms of the GNU General Public License as published by 
 12  # the Free Software Foundation, either version 3 of the License, or 
 13  # (at your option) any later version. 
 14  #   
 15  # ETE is distributed in the hope that it will be useful, 
 16  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 17  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 18  # GNU General Public License for more details. 
 19  #   
 20  # You should have received a copy of the GNU General Public License 
 21  # along with ETE.  If not, see <http://www.gnu.org/licenses/>. 
 22  # 
 23  # #END_LICENSE############################################################# 
 24   
 25  from sys import stderr 
 26  import numpy 
 27  import clustvalidation 
 28  from numpy import nan as NaN # Missing values are saved as NaN values 
 29  from ete2.coretype.tree import _translate_nodes 
 30  from ete2 import TreeNode, ArrayTable 
 31   
 32  __all__ = ["ClusterNode", "ClusterTree"] 
 33   
34 -class ClusterNode(TreeNode):
35 """ Creates a new Cluster Tree object, which is a collection 36 of ClusterNode instances connected in a hierarchical way, and 37 representing a clustering result. 38 39 a newick file or string can be passed as the first argument. An 40 ArrayTable file or instance can be passed as a second argument. 41 42 Examples: 43 t1 = Tree() # creates an empty tree 44 t2 = Tree( '(A:1,(B:1,(C:1,D:1):0.5):0.5);' ) 45 t3 = Tree( '/home/user/myNewickFile.txt' ) 46 """ 47
48 - def _set_forbidden(self, value):
49 raise ValueError, "This attribute can not be manually set."
50
51 - def _get_intra(self):
52 if self._silhouette == None: 53 self.get_silhouette() 54 return self._intracluster_dist
55
56 - def _get_inter(self):
57 if self._silhouette == None: 58 self.get_silhouette() 59 return self._intercluster_dist
60
61 - def _get_silh(self):
62 if self._silhouette == None: 63 self.get_silhouette() 64 return self._silhouette
65
66 - def _get_prof(self):
67 if self._profile is None: 68 self._calculate_avg_profile() 69 return self._profile
70
71 - def _get_std(self):
72 if self._std_profile is None: 73 self._calculate_avg_profile() 74 return self._std_profile
75
76 - def _set_profile(self, value):
77 self._profile = value
78 79 intracluster_dist = property(fget=_get_intra, fset=_set_forbidden) 80 intercluster_dist = property(fget=_get_inter, fset=_set_forbidden) 81 silhouette = property(fget=_get_silh, fset=_set_forbidden) 82 profile = property(fget=_get_prof, fset=_set_profile) 83 deviation = property(fget=_get_std, fset=_set_forbidden) 84
85 - def __init__(self, newick = None, text_array = None, \ 86 fdist=clustvalidation.default_dist):
87 # Default dist is spearman_dist when scipy module is loaded 88 # otherwise, it is set to euclidean_dist. 89 90 # Initialize basic tree features and loads the newick (if any) 91 TreeNode.__init__(self, newick) 92 self._fdist = None 93 self._silhouette = None 94 self._intercluster_dist = None 95 self._intracluster_dist = None 96 self._profile = None 97 self._std_profile = None 98 99 # Cluster especific features 100 self.features.add("intercluster_dist") 101 self.features.add("intracluster_dist") 102 self.features.add("silhouette") 103 self.features.add("profile") 104 self.features.add("deviation") 105 106 # Initialize tree with array data 107 if text_array: 108 self.link_to_arraytable(text_array) 109 110 if newick: 111 self.set_distance_function(fdist)
112
113 - def set_distance_function(self, fn):
114 """ Sets the distance function used to calculate cluster 115 distances and silouette index. 116 117 ARGUMENTS: 118 119 fn: a pointer to python function acepting two arrays (numpy) as 120 arguments. 121 122 EXAMPLE: 123 124 # A simple euclidean distance 125 my_dist_fn = lambda x,y: abs(x-y) 126 tree.set_distance_function(my_dist_fn) 127 128 """ 129 for n in self.traverse(): 130 n._fdist = fn 131 n._silhouette = None 132 n._intercluster_dist = None 133 n._intracluster_dist = None
134 174
175 - def iter_leaf_profiles(self):
176 """ Returns an iterator over all the profiles associated to 177 the leaves under this node.""" 178 for l in self.iter_leaves(): 179 yield l.get_profile()[0]
180
181 - def get_leaf_profiles(self):
182 """ Returns the list of all the profiles associated to the 183 leaves under this node.""" 184 return [l.get_profile()[0] for l in self.iter_leaves()]
185
186 - def get_silhouette(self, fdist=None):
187 """ Calculates the node's silhouette value by using a given 188 distance function. By default, euclidean distance is used. It 189 also calculates the deviation profile, mean profile, and 190 inter/intra-cluster distances. 191 192 It sets the following features into the analyzed node: 193 - node.intracluster 194 - node.intercluster 195 - node.silhouete 196 197 intracluster distances a(i) are calculated as the Centroid 198 Diameter 199 200 intercluster distances b(i) are calculated as the Centroid linkage distance 201 202 ** Rousseeuw, P.J. (1987) Silhouettes: A graphical aid to the 203 interpretation and validation of cluster analysis. 204 J. Comput. Appl. Math., 20, 53-65. 205 206 """ 207 if fdist is None: 208 fdist = self._fdist 209 210 # Updates internal values 211 self._silhouette, self._intracluster_dist, self._intercluster_dist = \ 212 clustvalidation.get_silhouette_width(fdist, self) 213 # And returns them 214 return self._silhouette, self._intracluster_dist, self._intercluster_dist
215
216 - def get_dunn(self, clusters, fdist=None):
217 """ Calculates the Dunn index for the given set of descendant 218 nodes. 219 """ 220 221 if fdist is None: 222 fdist = self._fdist 223 nodes = _translate_nodes(self, *clusters) 224 return clustvalidation.get_dunn_index(fdist, *nodes)
225
226 - def _calculate_avg_profile(self):
227 """ This internal function updates the mean profile 228 associated to an internal node. """ 229 230 # Updates internal values 231 self._profile, self._std_profile = clustvalidation.get_avg_profile(self)
232 233 234 # cosmetic alias 235 ClusterTree = ClusterNode 236