1 __VERSION__="ete2-2.0rev90"
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25 import re
26 import os
27
28 __all__ = ["read_newick", "write_newick", "print_supported_formats"]
29
30
31 _ILEGAL_NEWICK_CHARS = ":;(),\[\]\t\n\r="
32 _NHX_RE = "\[&&NHX:[^\]]*\]"
33 _FLOAT_RE = "[+-]?\d+\.?\d*"
34 _NAME_RE = "[^():,;\[\]]+"
35
36 DEFAULT_DIST = 1.0
37 DEFAULT_NAME = ''
38 DEFAULT_SUPPORT = 1.0
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72 NW_FORMAT = {
73 0: [['name', str, True], ["dist", float, True], ['support', float, True], ["dist", float, True]],
74 1: [['name', str, True], ["dist", float, True], ['name', str, True], ["dist", float, True]],
75 2: [['name', str, False], ["dist", float, False], ['support', float, False], ["dist", float, False]],
76 3: [['name', str, False], ["dist", float, False], ['name', str, False], ["dist", float, False]],
77 4: [['name', str, False], ["dist", float, False], [None, None, False], [None, None, False]],
78 5: [['name', str, False], ["dist", float, False], [None, None, False], ["dist", float, False]],
79 6: [['name', str, False], [None, None, False], [None, None, False], ["dist", float, False]],
80 7: [['name', str, False], ["dist", float, False], ["name", str, False], [None, None, False]],
81 8: [['name', str, False], [None, None, False], ["name", str, False], [None, None, False]],
82 9: [['name', str, False], [None, None, False], [None, None, False], [None, None, False]],
83 100: [[None, None, False], [None, None, False], [None, None, False], [None, None, False]]
84 }
85
86
130
131
142
156
164
166 """Exception class designed for NewickIO errors."""
167 pass
168
170 """ Reads a newick tree from either a string or a file, and returns
171 an ETE tree structure.
172
173 A previously existent node object can be passed as the root of the
174 tree, which means that all its new children will belong to the same
175 class as the root(This allows to work with custom TreeNode
176 objects).
177
178 You can also take advantage from this behaviour to concatenate
179 several tree structures.
180 """
181
182 if root_node is None:
183 from ete2.coretype.tree import TreeNode
184 root_node = TreeNode()
185
186 if type(newick) == str:
187
188 if os.path.exists(newick):
189 nw = open(newick, 'rU').read()
190 else:
191 nw = newick
192 nw = nw.strip()
193 if not nw.startswith('(') or not nw.endswith(';'):
194 raise NewickError, \
195 'Unexisting tree file or Malformed newick tree structure.'
196 return _read_newick_from_string(nw, root_node, format)
197 else:
198 raise NewickError, \
199 "'newick' argument must be either a filename or a newick string."
200
202 """ Reads a newick string in the New Hampshire format. """
203
204 if nw.count('(') != nw.count(')'):
205 raise NewickError, 'Parentheses do not match. Broken tree structure'
206
207
208 nw = re.sub("\n", "", nw)
209 nw = re.sub("\r", "", nw)
210 nw = re.sub("\t", "", nw)
211
212 current_parent = None
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229 for internal_node in nw.split("(")[1:]:
230
231
232 if current_parent is None:
233 current_parent = root_node
234 else:
235 current_parent = current_parent.add_child()
236
237
238 possible_leaves = internal_node.split(",")
239 for i, leaf in enumerate(possible_leaves):
240
241
242
243
244
245 if leaf.strip() == '' and i == len(possible_leaves)-1:
246 continue
247
248
249
250 clossing_nodes = leaf.split(")")
251
252 _read_node_data(clossing_nodes[0], current_parent, "leaf", format)
253
254
255 if len(clossing_nodes)>1:
256 for closing_internal in clossing_nodes[1:]:
257 if closing_internal.strip() ==";": continue
258 _read_node_data(closing_internal, current_parent, "internal", format)
259 current_parent = current_parent.up
260 return root_node
261
263 """ Reads node's extra data form its NHX string. NHX uses this
264 format: [&&NHX:prop1=value1:prop2=value2] """
265 NHX_string = NHX_string.replace("[&&NHX:", "")
266 NHX_string = NHX_string.replace("]", "")
267 for field in NHX_string.split(":"):
268 try:
269 pname, pvalue = field.split("=")
270 except ValueError, e:
271 print NHX_string, field.split("=")
272 raise ValueError, e
273 node.add_feature(pname, pvalue)
274
276 """ Reads a leaf node from a subpart of the original newick
277 tree """
278
279 if node_type == "leaf":
280 node = current_node.add_child()
281 container1 = NW_FORMAT[format][0][0]
282 container2 = NW_FORMAT[format][1][0]
283 converterFn1 = NW_FORMAT[format][0][1]
284 converterFn2 = NW_FORMAT[format][1][1]
285 flexible1 = NW_FORMAT[format][0][2]
286 flexible2 = NW_FORMAT[format][1][2]
287 else:
288 node = current_node
289 container1 = NW_FORMAT[format][2][0]
290 container2 = NW_FORMAT[format][3][0]
291 converterFn1 = NW_FORMAT[format][2][1]
292 converterFn2 = NW_FORMAT[format][3][1]
293 flexible1 = NW_FORMAT[format][2][2]
294 flexible2 = NW_FORMAT[format][3][2]
295
296 if converterFn1 == str:
297 FIRST_MATCH = "("+_NAME_RE+")"
298 elif converterFn1 == float:
299 FIRST_MATCH = "("+_FLOAT_RE+")"
300 elif converterFn1 is None:
301 FIRST_MATCH = '()'
302
303 if converterFn2 == str:
304 SECOND_MATCH = "(:"+_NAME_RE+")"
305 elif converterFn2 == float:
306 SECOND_MATCH = "(:"+_FLOAT_RE+")"
307 elif converterFn2 is None:
308 SECOND_MATCH = '()'
309
310 if flexible1:
311 FIRST_MATCH += "?"
312 if flexible2:
313 SECOND_MATCH += "?"
314
315 MATCH = '%s\s*%s\s*(%s)?' % (FIRST_MATCH, SECOND_MATCH, _NHX_RE)
316 data = re.match(MATCH, subnw)
317 if data:
318 data = data.groups()
319 if data[0] is not None and data[0] != '':
320 node.add_feature(container1, converterFn1(data[0].strip()))
321
322 if data[1] is not None and data[1] != '':
323 node.add_feature(container2, converterFn2(data[1][1:].strip()))
324
325 if data[2] is not None \
326 and data[2].startswith("[&&NHX"):
327 _parse_extra_features(node, data[2])
328 else:
329 raise NewickError, "Unexpected leaf node format:\n\t"+ subnw[0:50]
330 return
331
332 -def write_newick(node, features=[], format=1, _is_root=True):
333 """ Recursively reads a tree structure and returns its NHX
334 representation. """
335 newick = ""
336 if not node.children:
337 safe_name = re.sub("["+_ILEGAL_NEWICK_CHARS+"]", "_", \
338 str(getattr(node, "name")))
339
340 newick += format_node(node, "leaf", format)
341 newick += _get_features_string(node, features)
342 return newick
343 else:
344 if node.children:
345 newick+= "("
346 for cnode in node.children:
347 newick += write_newick(cnode, features, format=format,\
348 _is_root = False)
349
350 if cnode == node.children[-1]:
351 newick += ")"
352 if node.up is not None:
353 newick += format_node(node, "internal", format)
354 newick += _get_features_string(node, features)
355 else:
356 newick += ','
357 if _is_root:
358 newick += ";"
359 return newick
360
361
363 """ Generates the extended newick string NHX with extra data about
364 a node. """
365 string = ""
366 if features is None:
367 features = []
368 elif features == []:
369 features = self.features
370
371 for pr in features:
372 if hasattr(self, pr):
373 value = re.sub("["+_ILEGAL_NEWICK_CHARS+"]", "_", \
374 str(getattr(self, pr)))
375 if string != "":
376 string +=":"
377 string +="%s=%s" %(pr, str(value))
378 if string != "":
379 string = "[&&NHX:"+string+"]"
380
381 return string
382