Package ete2 :: Package clustering :: Module clustertree
[hide private]
[frames] | no frames]

Source Code for Module ete2.clustering.clustertree

  1  __VERSION__="ete2-2.0rev90"  
  2  # #START_LICENSE########################################################### 
  3  # 
  4  # Copyright (C) 2009 by Jaime Huerta Cepas. All rights reserved.   
  5  # email: jhcepas@gmail.com 
  6  # 
  7  # This file is part of the Environment for Tree Exploration program (ETE).  
  8  # http://ete.cgenomics.org 
  9  #   
 10  # ETE is free software: you can redistribute it and/or modify it 
 11  # under the terms of the GNU General Public License as published by 
 12  # the Free Software Foundation, either version 3 of the License, or 
 13  # (at your option) any later version. 
 14  #   
 15  # ETE is distributed in the hope that it will be useful, 
 16  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 17  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 18  # GNU General Public License for more details. 
 19  #   
 20  # You should have received a copy of the GNU General Public License 
 21  # along with ETE.  If not, see <http://www.gnu.org/licenses/>. 
 22  # 
 23  # #END_LICENSE############################################################# 
 24   
 25  from sys import stderr 
 26  import clustvalidation 
 27  from numpy import nan as NaN # Missing values are saved as NaN values 
 28  from ete2.coretype.tree import _translate_nodes 
 29  from ete2 import TreeNode, ArrayTable 
 30   
 31  __all__ = ["ClusterNode", "ClusterTree"] 
 32   
33 -class ClusterNode(TreeNode):
34 """ Creates a new Cluster Tree object, which is a collection 35 of ClusterNode instances connected in a hierarchical way, and 36 representing a clustering result. 37 38 a newick file or string can be passed as the first argument. An 39 ArrayTable file or instance can be passed as a second argument. 40 41 Examples: 42 t1 = Tree() # creates an empty tree 43 t2 = Tree( '(A:1,(B:1,(C:1,D:1):0.5):0.5);' ) 44 t3 = Tree( '/home/user/myNewickFile.txt' ) 45 """ 46
47 - def _set_forbidden(self, value):
48 raise ValueError, "This attribute can not be manually set."
49
50 - def _get_intra(self):
51 if self._silhouette == None: 52 self.get_silhouette() 53 return self._intracluster_dist
54
55 - def _get_inter(self):
56 if self._silhouette == None: 57 self.get_silhouette() 58 return self._intercluster_dist
59
60 - def _get_silh(self):
61 if self._silhouette == None: 62 self.get_silhouette() 63 return self._silhouette
64
65 - def _get_prof(self):
66 if self._profile is None: 67 self._calculate_avg_profile() 68 return self._profile
69
70 - def _get_std(self):
71 if self._std_profile is None: 72 self._calculate_avg_profile() 73 return self._std_profile
74
75 - def _set_profile(self, value):
76 self._profile = value
77 78 intracluster_dist = property(fget=_get_intra, fset=_set_forbidden) 79 intercluster_dist = property(fget=_get_inter, fset=_set_forbidden) 80 silhouette = property(fget=_get_silh, fset=_set_forbidden) 81 profile = property(fget=_get_prof, fset=_set_profile) 82 deviation = property(fget=_get_std, fset=_set_forbidden) 83
84 - def __init__(self, newick = None, text_array = None, \ 85 fdist=clustvalidation.default_dist):
86 # Default dist is spearman_dist when scipy module is loaded 87 # otherwise, it is set to euclidean_dist. 88 89 # Initialize basic tree features and loads the newick (if any) 90 TreeNode.__init__(self, newick) 91 self._fdist = None 92 self._silhouette = None 93 self._intercluster_dist = None 94 self._intracluster_dist = None 95 self._profile = None 96 self._std_profile = None 97 98 # Cluster especific features 99 self.features.add("intercluster_dist") 100 self.features.add("intracluster_dist") 101 self.features.add("silhouette") 102 self.features.add("profile") 103 self.features.add("deviation") 104 105 # Initialize tree with array data 106 if text_array: 107 self.link_to_arraytable(text_array) 108 109 if newick: 110 self.set_distance_function(fdist)
111
112 - def set_distance_function(self, fn):
113 """ Sets the distance function used to calculate cluster 114 distances and silouette indexex. 115 116 ARGUMENTS: 117 118 fn: a pointer to python function acepting two arrays (numpy) as 119 arguments. 120 121 EXAMPLE: 122 123 # A simple euclidean distance 124 my_dist_fn = lambda x,y: abs(x-y) 125 tree.set_distance_function(my_dist_fn) 126 127 """ 128 for n in self.traverse(): 129 n._fdist = fn 130 self._silhouette = None 131 self._intercluster_dist = None 132 self._intracluster_dist = None
133 167
168 - def iter_leaf_profiles(self):
169 """ Returns an iterator over all the profiles associated to 170 the leaves under this node.""" 171 for l in self.iter_leaves(): 172 yield l.get_profile()[0]
173
174 - def get_leaf_profiles(self):
175 """ Returns the list of all the profiles associated to the 176 leaves under this node.""" 177 return [l.get_profile()[0] for l in self.iter_leaves()]
178
179 - def get_silhouette(self, fdist=None):
180 """ Calculates the node's silhouette value by using a given 181 distance function. By default, euclidean distance is used. It 182 also calculates the deviation profile, mean profile, and 183 inter/intra-cluster distances. 184 185 It sets the following features into the analyzed node: 186 - node.intracluster 187 - node.intercluster 188 - node.silhouete 189 190 intracluster distances a(i) are calculated as the Centroid 191 Diameter 192 193 intercluster distances b(i) are calculated as the Centroid linkage distance 194 195 ** Rousseeuw, P.J. (1987) Silhouettes: A graphical aid to the 196 interpretation and validation of cluster analysis. 197 J. Comput. Appl. Math., 20, 53-65. 198 199 """ 200 if fdist is None: 201 fdist = self._fdist 202 203 # Updates internal values 204 self._silhouette, self._intracluster_dist, self._intercluster_dist = \ 205 clustvalidation.get_silhouette_width(fdist, self) 206 # And returns them 207 return self._silhouette, self._intracluster_dist, self._intercluster_dist
208
209 - def get_dunn(self, clusters, fdist=None):
210 """ Calculates the Dunn index for the given set of descendant 211 nodes. 212 """ 213 214 if fdist is None: 215 fdist = self._fdist 216 nodes = _translate_nodes(self, *clusters) 217 return clustvalidation.get_dunn_index(fdist, *nodes)
218
219 - def _calculate_avg_profile(self):
220 """ This internal function updates the mean profile 221 associated to an internal node. """ 222 223 # Updates internal values 224 self._profile, self._std_profile = clustvalidation.get_avg_profile(self)
225 226 227 # cosmetic alias 228 ClusterTree = ClusterNode 229