1 __VERSION__="ete2-2.0rev90"
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25 """
26 The 'seqgroup' module provides methods and classes to operate with
27 Multiple Sequence Files, including Multiple Sequence Alignments.
28
29 Currently, Fasta, Phylip sequencial and Phylip interleaved formats are
30 supported.
31 """
32
33 from ete2.parser.fasta import read_fasta, write_fasta
34 from ete2.parser.phylip import read_phylip, write_phylip
35
36 __all__ = ["SeqGroup"]
37
39 """
40 SeqGroup class can be used to store a set of sequences (aligned
41 or not).
42
43 CONSTRUCTOR ARGUMENTS:
44 ======================
45
46 * sequences: Path to the file containing the sequences or,
47 alternatively, the text string containing the same information.
48
49 * format (optional): the format in which sequences are encoded. Current
50 supported formats are: "fasta", "phylip" (phylip sequencial)
51 and "iphylip" (phylip interleaved)
52
53 RETURNS:
54 ========
55 A SeqGroup object to operate with sequencies.
56
57 EXAMPLES:
58 =========
59 msf = ">seq1\\nAAAAAAAAAAA\\n>seq2\\nTTTTTTTTTTTTT\\n"
60 seqs = SeqGroup(msf, format="fasta")
61 print seqs.get_seq("seq1")
62 """
63
65 return len(self.id2seq)
66
68 return item in self.name2id
69
72
75
76 - def __init__(self, sequences = None , format="fasta"):
77 self.parsers = {
78 "fasta": [read_fasta, write_fasta, {}],
79 "phylip": [read_phylip, write_phylip, {"interleaved":False}],
80 "iphylip": [read_phylip, write_phylip, {"interleaved":True}]
81 }
82
83 self.id2name = {}
84 self.name2id = {}
85 self.id2comment= {}
86 self.id2seq = {}
87
88 if sequences is not None:
89 format = format.lower()
90 if format in self.parsers:
91 read = self.parsers[format][0]
92 args = self.parsers[format][2]
93 read(sequences, obj=self, **args)
94 else:
95 raise ValueError, "Unsupported format: [%s]" %format
96
97 - def write(self, format="fasta", outfile=None):
98 """ Returns the text representation of the sequences in the
99 supplied given format (default=FASTA). If "oufile" argument is
100 used, the result is written into the given path."""
101
102 format = format.lower()
103 if format in self.parsers:
104 write = self.parsers[format][1]
105 args = self.parsers[format][2]
106 return write(self, outfile, **args)
107 else:
108 raise ValueError, "Unssupported format: [%s]" %format
109
111 """ Returns an iterator over all sequences in the
112 collection. Each item is a tuple with the sequence name,
113 sequence, and sequence comments """
114 for i, seq in self.id2seq.iteritems():
115 yield self.id2name[i], seq, self.id2comment.get(i, [])
116
118 """ Returns the sequence associated to a given entry name."""
119 return self.id2seq[self.name2id[name]]
120
122 """ Returns the list of entries currently stored."""
123 keys = self.id2seq.keys()
124 seqs = self.id2seq.values()
125 comments = [self.id2comment.get(x, []) for x in keys]
126 names = map(lambda x: self.id2name[x], keys)
127 return zip(names, seqs, comments)
128