1 __VERSION__="ete2-2.0rev90"
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25 import numpy
26
28 """ Returns mean value discarding non finite values """
29 valid_values = []
30 for v in values:
31 if numpy.isfinite(v):
32 valid_values.append(v)
33 return numpy.mean(valid_values), numpy.std(valid_values)
34
36 """ Returns mean profile discarding non finite values.
37 """
38
39 if len(vectors)==1:
40 return vectors[0], numpy.zeros(len(vectors[0]))
41
42 length = len(vectors[0])
43
44 safe_mean = []
45 safe_std = []
46
47 for pos in xrange(length):
48 pos_mean = []
49 for v in vectors:
50 if numpy.isfinite(v[pos]):
51 pos_mean.append(v[pos])
52 safe_mean.append(numpy.mean(pos_mean))
53 safe_std.append(numpy.std(pos_mean))
54 return numpy.array(safe_mean), numpy.array(safe_std)
55
87
89 """ This internal function updates the mean profile
90 associated to an internal node. """
91
92 if not node.is_leaf():
93 leaf_vectors = [n._profile for n in node.get_leaves() \
94 if n._profile is not None]
95 if len(leaf_vectors)>0:
96 node._profile, node._std_profile = safe_mean_vector(leaf_vectors)
97 else:
98 node._profile, node._std_profile = None, None
99 return node._profile, node._std_profile
100 else:
101 node._std_profile = [0.0]*len(node._profile)
102 return node._profile, [0.0]*len(node._profile)
103
104
106 """
107 Returns the Dunn index for the given selection of nodes.
108
109 J.C. Dunn. Well separated clusters and optimal fuzzy
110 partitions. 1974. J.Cybern. 4. 95-104.
111
112 """
113
114 if len(clusters)<2:
115 raise ValueError, "At least 2 clusters are required"
116
117 intra_dist = []
118 for c in clusters:
119 for i in c.get_leaves():
120 if i is not None:
121
122 a = fdist(i.profile, c.profile)*2
123 intra_dist.append(a)
124 max_a = numpy.max(intra_dist)
125 inter_dist = []
126 for i, ci in enumerate(clusters):
127 for cj in clusters[i+1:]:
128
129 b = fdist(ci.profile, cj.profile)
130 inter_dist.append(b)
131 min_b = numpy.min(inter_dist)
132
133 if max_a == 0.0:
134 D = 0.0
135 else:
136 D = min_b / max_a
137 return D
138
139
140
141
142
143
144
146 if (v1 == v2).all():
147 return 0.0
148 else:
149 return 1.0 - stats.pearsonr(v1,v2)[0]
150
152 if (v1 == v2).all():
153 return 0.0
154 else:
155 return 1.0 - stats.spearmanr(v1,v2)[0]
156
162
164 if (v1 == v2).all():
165 return 0.0
166 valids = 0
167 distance= 0.0
168 for i in xrange(len(v1)):
169 if numpy.isfinite(v1[i]) and numpy.isfinite(v2[i]):
170 valids += 1
171 d = v1[i]-v2[i]
172 distance += d*d
173 if valids==0:
174 raise ValueError, "Cannot calculate values"
175 return distance/valids
176
177 try:
178 from scipy import stats
179 default_dist = spearman_dist
180 except ImportError:
181 print "'scipy' module is not found in your system."
182 print "Correlation based distances will not be avaliable."
183 default_dist = euclidean_dist
184