1 __VERSION__="ete2-2.0rev104"
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25 import numpy
26 from math import sqrt
27
29 """ Returns mean value discarding non finite values """
30 valid_values = []
31 for v in values:
32 if numpy.isfinite(v):
33 valid_values.append(v)
34 return numpy.mean(valid_values), numpy.std(valid_values)
35
37 """ Returns mean profile discarding non finite values.
38 """
39
40 if len(vectors)==1:
41 return vectors[0], numpy.zeros(len(vectors[0]))
42
43 length = len(vectors[0])
44
45 safe_mean = []
46 safe_std = []
47
48 for pos in xrange(length):
49 pos_mean = []
50 for v in vectors:
51 if numpy.isfinite(v[pos]):
52 pos_mean.append(v[pos])
53 safe_mean.append(numpy.mean(pos_mean))
54 safe_std.append(numpy.std(pos_mean))
55 return numpy.array(safe_mean), numpy.array(safe_std)
56
88
90 """ This internal function updates the mean profile
91 associated to an internal node. """
92
93 if not node.is_leaf():
94 leaf_vectors = [n._profile for n in node.get_leaves() \
95 if n._profile is not None]
96 if len(leaf_vectors)>0:
97 node._profile, node._std_profile = safe_mean_vector(leaf_vectors)
98 else:
99 node._profile, node._std_profile = None, None
100 return node._profile, node._std_profile
101 else:
102 node._std_profile = [0.0]*len(node._profile)
103 return node._profile, [0.0]*len(node._profile)
104
105
107 """
108 Returns the Dunn index for the given selection of nodes.
109
110 J.C. Dunn. Well separated clusters and optimal fuzzy
111 partitions. 1974. J.Cybern. 4. 95-104.
112
113 """
114
115 if len(clusters)<2:
116 raise ValueError, "At least 2 clusters are required"
117
118 intra_dist = []
119 for c in clusters:
120 for i in c.get_leaves():
121 if i is not None:
122
123 a = fdist(i.profile, c.profile)*2
124 intra_dist.append(a)
125 max_a = numpy.max(intra_dist)
126 inter_dist = []
127 for i, ci in enumerate(clusters):
128 for cj in clusters[i+1:]:
129
130 b = fdist(ci.profile, cj.profile)
131 inter_dist.append(b)
132 min_b = numpy.min(inter_dist)
133
134 if max_a == 0.0:
135 D = 0.0
136 else:
137 D = min_b / max_a
138 return D
139
140
141
142
143
144
145
147 if (v1 == v2).all():
148 return 0.0
149 else:
150 return 1.0 - stats.pearsonr(list(v1),list(v2))[0]
151
153 if (v1 == v2).all():
154 return 0.0
155 else:
156 return 1.0 - stats.spearmanr(list(v1),list(v2))[0]
157
163
165 if (v1 == v2).all():
166 return 0.0
167 valids = 0
168 distance= 0.0
169 for i in xrange(len(v1)):
170 if numpy.isfinite(v1[i]) and numpy.isfinite(v2[i]):
171 valids += 1
172 d = v1[i]-v2[i]
173 distance += d*d
174 if valids==0:
175 raise ValueError, "Cannot calculate values"
176 return distance/valids
177
178 try:
179 from scipy import stats
180 except ImportError:
181 try:
182 import stats
183 default_dist = spearman_dist
184 except ImportError:
185 default_dist = euclidean_dist
186 else:
187 default_dist = spearman_dist
188