1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21 """Manage the OmegaT glossary format
22
23 OmegaT glossary format is used by the
24 U{OmegaT<http://www.omegat.org/en/omegat.html>} computer aided
25 translation tool.
26
27 It is a bilingual base class derived format with L{OmegaTFile}
28 and L{OmegaTUnit} providing file and unit level access.
29
30 Format Implementation
31 =====================
32 The OmegaT glossary format is a simple Tab Separated Value (TSV) file
33 with the columns: source, target, comment.
34
35 The dialect of the TSV files is specified by L{OmegaTDialect}.
36
37 Encoding
38 --------
39 The files are either UTF-8 or encoded using the system default. UTF-8
40 encoded files use the .utf8 extension while system encoded files use
41 the .tab extension.
42 """
43
44 import csv
45 import locale
46 import sys
47 from translate.storage import base
48
49 OMEGAT_FIELDNAMES = ["source", "target", "comment"]
50 """Field names for an OmegaT glossary unit"""
51
52
67 csv.register_dialect("omegat", OmegaTDialect)
68
69
71 """An OmegaT glossary unit"""
72
78
80 """Get the dictionary of values for a OmegaT line"""
81 return self._dict
82
84 """Set the dictionary of values for a OmegaT line
85
86 @param newdict: a new dictionary with OmegaT line elements
87 @type newdict: Dict
88 """
89
90 self._dict = newdict
91 dict = property(getdict, setdict)
92
94 if key not in self._dict:
95 return None
96 elif self._dict[key]:
97 return self._dict[key].decode('utf-8')
98 else:
99 return ""
100
102 if newvalue is None:
103 self._dict[key] = None
104 if isinstance(newvalue, unicode):
105 newvalue = newvalue.encode('utf-8')
106 if not key in self._dict or newvalue != self._dict[key]:
107 self._dict[key] = newvalue
108
111
112 - def addnote(self, text, origin=None, position="append"):
113 currentnote = self._get_field('comment')
114 if position == "append" and currentnote is not None and currentnote != u'':
115 self._set_field('comment', currentnote + '\n' + text)
116 else:
117 self._set_field('comment', text)
118
121
124
126 self._rich_source = None
127 return self._set_field('source', newsource)
128 source = property(getsource, setsource)
129
132
134 self._rich_target = None
135 return self._set_field('target', newtarget)
136 target = property(gettarget, settarget)
137
139 self._dict['target-lang'] = newlang
140 targetlang = property(None, settargetlang)
141
143 return str(self._dict)
144
146 return bool(self._dict.get('target', None))
147
148
150 """An OmegaT glossary file"""
151 Name = _("OmegaT Glossary")
152 Mimetypes = ["application/x-omegat-glossary"]
153 Extensions = ["utf8"]
154
156 """Construct an OmegaT glossary, optionally reading in from
157 inputfile."""
158 self.UnitClass = unitclass
159 base.TranslationStore.__init__(self, unitclass=unitclass)
160 self.filename = ''
161 self.extension = ''
162 self._encoding = self._get_encoding()
163 if inputfile is not None:
164 self.parse(inputfile)
165
168
170 """parsese the given file or file source string"""
171 if hasattr(input, 'name'):
172 self.filename = input.name
173 elif not getattr(self, 'filename', ''):
174 self.filename = ''
175 if hasattr(input, "read"):
176 tmsrc = input.read()
177 input.close()
178 input = tmsrc
179 try:
180 input = input.decode(self._encoding).encode('utf-8')
181 except:
182 raise ValueError("OmegaT files are either UTF-8 encoded or use the default system encoding")
183 lines = csv.DictReader(input.split("\n"), fieldnames=OMEGAT_FIELDNAMES,
184 dialect="omegat")
185 for line in lines:
186 newunit = OmegaTUnit()
187 newunit.dict = line
188 self.addunit(newunit)
189
191 output = csv.StringIO()
192 writer = csv.DictWriter(output, fieldnames=OMEGAT_FIELDNAMES,
193 dialect="omegat")
194 unit_count = 0
195 for unit in self.units:
196 if unit.istranslated():
197 unit_count += 1
198 writer.writerow(unit.dict)
199 if unit_count == 0:
200 return ""
201 output.reset()
202 decoded = "".join(output.readlines()).decode('utf-8')
203 try:
204 return decoded.encode(self._encoding)
205 except UnicodeEncodeError:
206 return decoded.encode('utf-8')
207
208
210 """An OmegaT glossary file in the default system encoding"""
211 Name = _("OmegaT Glossary")
212 Mimetypes = ["application/x-omegat-glossary"]
213 Extensions = ["tab"]
214
216 return locale.getdefaultlocale()[1]
217