Package translate :: Package storage :: Module base
[hide private]
[frames] | no frames]

Source Code for Module translate.storage.base

  1  #!/usr/bin/env python 
  2  # -*- coding: utf-8 -*- 
  3  # 
  4  # Copyright 2006-2009 Zuza Software Foundation 
  5  # 
  6  # This file is part of the Translate Toolkit. 
  7  # 
  8  # This program is free software; you can redistribute it and/or modify 
  9  # it under the terms of the GNU General Public License as published by 
 10  # the Free Software Foundation; either version 2 of the License, or 
 11  # (at your option) any later version. 
 12  # 
 13  # This program is distributed in the hope that it will be useful, 
 14  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 15  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 16  # GNU General Public License for more details. 
 17  # 
 18  # You should have received a copy of the GNU General Public License 
 19  # along with this program; if not, see <http://www.gnu.org/licenses/>. 
 20   
 21  """Base classes for storage interfaces. 
 22   
 23  @organization: Zuza Software Foundation 
 24  @copyright: 2006-2009 Zuza Software Foundation 
 25  @license: U{GPL <http://www.fsf.org/licensing/licenses/gpl.html>} 
 26  """ 
 27   
 28  try: 
 29      import cPickle as pickle 
 30  except ImportError: 
 31      import pickle 
 32  from exceptions import NotImplementedError 
 33  import translate.i18n 
 34  from translate.storage.placeables import StringElem, general, parse as rich_parse 
 35  from translate.misc.typecheck import accepts, Self, IsOneOf 
 36  from translate.misc.multistring import multistring 
 37   
 38   
39 -def force_override(method, baseclass):
40 """Forces derived classes to override method.""" 41 42 if type(method.im_self) == type(baseclass): 43 # then this is a classmethod and im_self is the actual class 44 actualclass = method.im_self 45 else: 46 actualclass = method.im_class 47 if actualclass != baseclass: 48 raise NotImplementedError( 49 "%s does not reimplement %s as required by %s" % \ 50 (actualclass.__name__, method.__name__, baseclass.__name__) 51 )
52 53
54 -class ParseError(Exception):
55
56 - def __init__(self, inner_exc):
57 self.inner_exc = inner_exc
58
59 - def __str__(self):
60 return repr(self.inner_exc)
61 62
63 -class TranslationUnit(object):
64 """Base class for translation units. 65 66 Our concept of a I{translation unit} is influenced heavily by XLIFF: 67 U{http://www.oasis-open.org/committees/xliff/documents/xliff-specification.htm} 68 69 As such most of the method- and variable names borrows from XLIFF terminology. 70 71 A translation unit consists of the following: 72 - A I{source} string. This is the original translatable text. 73 - A I{target} string. This is the translation of the I{source}. 74 - Zero or more I{notes} on the unit. Notes would typically be some 75 comments from a translator on the unit, or some comments originating from 76 the source code. 77 - Zero or more I{locations}. Locations indicate where in the original 78 source code this unit came from. 79 - Zero or more I{errors}. Some tools (eg. L{pofilter <filters.pofilter>}) can run checks on 80 translations and produce error messages. 81 82 @group Source: *source* 83 @group Target: *target* 84 @group Notes: *note* 85 @group Locations: *location* 86 @group Errors: *error* 87 """ 88 89 rich_parsers = [] 90 """A list of functions to use for parsing a string into a rich string tree.""" 91
92 - def __init__(self, source):
93 """Constructs a TranslationUnit containing the given source string.""" 94 self.notes = "" 95 self._store = None 96 self.source = source 97 self._target = None 98 self._rich_source = None 99 self._rich_target = None
100
101 - def __eq__(self, other):
102 """Compares two TranslationUnits. 103 104 @type other: L{TranslationUnit} 105 @param other: Another L{TranslationUnit} 106 @rtype: Boolean 107 @return: Returns True if the supplied TranslationUnit equals this unit. 108 """ 109 return self.source == other.source and self.target == other.target
110
111 - def __str__(self):
112 """Converts to a string representation that can be parsed back using L{parsestring()}.""" 113 # no point in pickling store object, so let's hide it for a while. 114 store = getattr(self, "_store", None) 115 self._store = None 116 dump = pickle.dumps(self) 117 self._store = store 118 return dump
119
120 - def rich_to_multistring(cls, elem_list):
121 """Convert a "rich" string tree to a C{multistring}: 122 123 >>> from translate.storage.placeables.interfaces import X 124 >>> rich = [StringElem(['foo', X(id='xxx', sub=[' ']), 'bar'])] 125 >>> TranslationUnit.rich_to_multistring(rich) 126 multistring(u'foo bar') 127 """ 128 return multistring([unicode(elem) for elem in elem_list])
129 rich_to_multistring = classmethod(rich_to_multistring) 130
131 - def multistring_to_rich(self, mulstring):
132 """Convert a multistring to a list of "rich" string trees: 133 134 >>> target = multistring([u'foo', u'bar', u'baz']) 135 >>> TranslationUnit.multistring_to_rich(target) 136 [<StringElem([<StringElem([u'foo'])>])>, 137 <StringElem([<StringElem([u'bar'])>])>, 138 <StringElem([<StringElem([u'baz'])>])>] 139 """ 140 if isinstance(mulstring, multistring): 141 return [rich_parse(s, self.rich_parsers) for s in mulstring.strings] 142 return [rich_parse(mulstring, self.rich_parsers)]
143
144 - def setsource(self, source):
145 """Sets the source string to the given value.""" 146 self._rich_source = None 147 self._source = source
148 source = property(lambda self: self._source, setsource) 149
150 - def settarget(self, target):
151 """Sets the target string to the given value.""" 152 self._rich_target = None 153 self._target = target
154 target = property(lambda self: self._target, settarget) 155
156 - def _get_rich_source(self):
157 if self._rich_source is None: 158 self._rich_source = self.multistring_to_rich(self.source) 159 return self._rich_source
160
161 - def _set_rich_source(self, value):
162 if not hasattr(value, '__iter__'): 163 raise ValueError('value must be iterable') 164 if len(value) < 1: 165 raise ValueError('value must have at least one element.') 166 if not isinstance(value[0], StringElem): 167 raise ValueError('value[0] must be of type StringElem.') 168 self._rich_source = list(value) 169 multi = self.rich_to_multistring(value) 170 if self.source != multi: 171 self.source = multi
172 rich_source = property(_get_rich_source, _set_rich_source) 173 """ @see: rich_to_multistring 174 @see: multistring_to_rich""" 175
176 - def _get_rich_target(self):
177 if self._rich_target is None: 178 self._rich_target = self.multistring_to_rich(self.target) 179 return self._rich_target
180
181 - def _set_rich_target(self, value):
182 if not hasattr(value, '__iter__'): 183 raise ValueError('value must be iterable') 184 if len(value) < 1: 185 raise ValueError('value must have at least one element.') 186 if not isinstance(value[0], StringElem): 187 raise ValueError('value[0] must be of type StringElem.') 188 self._rich_target = list(value) 189 self.target = self.rich_to_multistring(value)
190 rich_target = property(_get_rich_target, _set_rich_target) 191 """ @see: rich_to_multistring 192 @see: multistring_to_rich""" 193
194 - def gettargetlen(self):
195 """Returns the length of the target string. 196 197 @note: Plural forms might be combined. 198 @rtype: Integer 199 """ 200 length = len(self.target or "") 201 strings = getattr(self.target, "strings", []) 202 if strings: 203 length += sum([len(pluralform) for pluralform in strings[1:]]) 204 return length
205
206 - def getid(self):
207 """A unique identifier for this unit. 208 209 @rtype: string 210 @return: an identifier for this unit that is unique in the store 211 212 Derived classes should override this in a way that guarantees a unique 213 identifier for each unit in the store. 214 """ 215 return self.source
216
217 - def setid(self, value):
218 """Sets the unique identified for this unit. 219 220 only implemented if format allows ids independant from other 221 unit properties like source or context""" 222 pass
223
224 - def getlocations(self):
225 """A list of source code locations. 226 227 @note: Shouldn't be implemented if the format doesn't support it. 228 @rtype: List 229 """ 230 return []
231
232 - def addlocation(self, location):
233 """Add one location to the list of locations. 234 235 @note: Shouldn't be implemented if the format doesn't support it. 236 """ 237 pass
238
239 - def addlocations(self, location):
240 """Add a location or a list of locations. 241 242 @note: Most classes shouldn't need to implement this, 243 but should rather implement L{addlocation()}. 244 @warning: This method might be removed in future. 245 """ 246 if isinstance(location, list): 247 for item in location: 248 self.addlocation(item) 249 else: 250 self.addlocation(location)
251
252 - def getcontext(self):
253 """Get the message context.""" 254 return ""
255
256 - def setcontext(self, context):
257 """Set the message context""" 258 pass
259
260 - def getnotes(self, origin=None):
261 """Returns all notes about this unit. 262 263 It will probably be freeform text or something reasonable that can be 264 synthesised by the format. 265 It should not include location comments (see L{getlocations()}). 266 """ 267 return getattr(self, "notes", "")
268
269 - def addnote(self, text, origin=None, position="append"):
270 """Adds a note (comment). 271 272 @type text: string 273 @param text: Usually just a sentence or two. 274 @type origin: string 275 @param origin: Specifies who/where the comment comes from. 276 Origin can be one of the following text strings: 277 - 'translator' 278 - 'developer', 'programmer', 'source code' (synonyms) 279 """ 280 if position == "append" and getattr(self, "notes", None): 281 self.notes += '\n' + text 282 else: 283 self.notes = text
284
285 - def removenotes(self):
286 """Remove all the translator's notes.""" 287 self.notes = u''
288
289 - def adderror(self, errorname, errortext):
290 """Adds an error message to this unit. 291 292 @type errorname: string 293 @param errorname: A single word to id the error. 294 @type errortext: string 295 @param errortext: The text describing the error. 296 """ 297 pass
298
299 - def geterrors(self):
300 """Get all error messages. 301 302 @rtype: Dictionary 303 """ 304 return {}
305
306 - def markreviewneeded(self, needsreview=True, explanation=None):
307 """Marks the unit to indicate whether it needs review. 308 309 @keyword needsreview: Defaults to True. 310 @keyword explanation: Adds an optional explanation as a note. 311 """ 312 pass
313
314 - def istranslated(self):
315 """Indicates whether this unit is translated. 316 317 This should be used rather than deducing it from .target, 318 to ensure that other classes can implement more functionality 319 (as XLIFF does). 320 """ 321 return bool(self.target) and not self.isfuzzy()
322
323 - def istranslatable(self):
324 """Indicates whether this unit can be translated. 325 326 This should be used to distinguish real units for translation from 327 header, obsolete, binary or other blank units. 328 """ 329 return True
330
331 - def isfuzzy(self):
332 """Indicates whether this unit is fuzzy.""" 333 return False
334
335 - def markfuzzy(self, value=True):
336 """Marks the unit as fuzzy or not.""" 337 pass
338
339 - def isobsolete(self):
340 """indicate whether a unit is obsolete""" 341 return False
342
343 - def makeobsolete(self):
344 """Make a unit obsolete""" 345 pass
346
347 - def isheader(self):
348 """Indicates whether this unit is a header.""" 349 return False
350
351 - def isreview(self):
352 """Indicates whether this unit needs review.""" 353 return False
354
355 - def isblank(self):
356 """Used to see if this unit has no source or target string. 357 358 @note: This is probably used more to find translatable units, 359 and we might want to move in that direction rather and get rid of this. 360 """ 361 return not (self.source or self.target)
362
363 - def hasplural(self):
364 """Tells whether or not this specific unit has plural strings.""" 365 #TODO: Reconsider 366 return False
367
368 - def getsourcelanguage(self):
369 return getattr(self._store, "sourcelanguage", "en")
370
371 - def gettargetlanguage(self):
372 return getattr(self._store, "targetlanguage", None)
373
374 - def merge(self, otherunit, overwrite=False, comments=True, authoritative=False):
375 """Do basic format agnostic merging.""" 376 if not self.target or overwrite: 377 self.rich_target = otherunit.rich_target
378
379 - def unit_iter(self):
380 """Iterator that only returns this unit.""" 381 yield self
382
383 - def getunits(self):
384 """This unit in a list.""" 385 return [self]
386
387 - def buildfromunit(cls, unit):
388 """Build a native unit from a foreign unit, preserving as much 389 information as possible.""" 390 if type(unit) == cls and hasattr(unit, "copy") and callable(unit.copy): 391 return unit.copy() 392 newunit = cls(unit.source) 393 newunit.target = unit.target 394 newunit.markfuzzy(unit.isfuzzy()) 395 locations = unit.getlocations() 396 if locations: 397 newunit.addlocations(locations) 398 notes = unit.getnotes() 399 if notes: 400 newunit.addnote(notes) 401 return newunit
402 buildfromunit = classmethod(buildfromunit) 403 404 xid = property(lambda self: None, lambda self, value: None) 405 rid = property(lambda self: None, lambda self, value: None)
406 407
408 -class TranslationStore(object):
409 """Base class for stores for multiple translation units of type UnitClass.""" 410 411 UnitClass = TranslationUnit 412 """The class of units that will be instantiated and used by this class""" 413 Name = "Base translation store" 414 """The human usable name of this store type""" 415 Mimetypes = None 416 """A list of MIME types associated with this store type""" 417 Extensions = None 418 """A list of file extentions associated with this store type""" 419 _binary = False 420 """Indicates whether a file should be accessed as a binary file.""" 421 suggestions_in_format = False 422 """Indicates if format can store suggestions and alternative translation for a unit""" 423
424 - def __init__(self, unitclass=None):
425 """Constructs a blank TranslationStore.""" 426 self.units = [] 427 self.sourcelanguage = None 428 self.targetlanguage = None 429 if unitclass: 430 self.UnitClass = unitclass 431 super(TranslationStore, self).__init__()
432
433 - def getsourcelanguage(self):
434 """Gets the source language for this store""" 435 return self.sourcelanguage
436
437 - def setsourcelanguage(self, sourcelanguage):
438 """Sets the source language for this store""" 439 self.sourcelanguage = sourcelanguage
440
441 - def gettargetlanguage(self):
442 """Gets the target language for this store""" 443 return self.targetlanguage
444
445 - def settargetlanguage(self, targetlanguage):
446 """Sets the target language for this store""" 447 self.targetlanguage = targetlanguage
448
449 - def unit_iter(self):
450 """Iterator over all the units in this store.""" 451 for unit in self.units: 452 yield unit
453
454 - def getunits(self):
455 """Return a list of all units in this store.""" 456 return [unit for unit in self.unit_iter()]
457
458 - def addunit(self, unit):
459 """Appends the given unit to the object's list of units. 460 461 This method should always be used rather than trying to modify the 462 list manually. 463 464 @type unit: L{TranslationUnit} 465 @param unit: The unit that will be added. 466 """ 467 unit._store = self 468 self.units.append(unit)
469
470 - def addsourceunit(self, source):
471 """Adds and returns a new unit with the given source string. 472 473 @rtype: L{TranslationUnit} 474 """ 475 unit = self.UnitClass(source) 476 self.addunit(unit) 477 return unit
478
479 - def findid(self, id):
480 """find unit with matching id by checking id_index""" 481 self.require_index() 482 return self.id_index.get(id, None)
483
484 - def findunit(self, source):
485 """Finds the unit with the given source string. 486 487 @rtype: L{TranslationUnit} or None 488 """ 489 if len(getattr(self, "sourceindex", [])): 490 if source in self.sourceindex: 491 return self.sourceindex[source][0] 492 else: 493 for unit in self.units: 494 if unit.source == source: 495 return unit 496 return None
497
498 - def findunits(self, source):
499 """Finds the units with the given source string. 500 501 @rtype: L{TranslationUnit} or None 502 """ 503 if len(getattr(self, "sourceindex", [])): 504 if source in self.sourceindex: 505 return self.sourceindex[source] 506 else: 507 #FIXME: maybe we should generate index here instead since 508 #we'll scan all units anyway 509 result = [] 510 for unit in self.units: 511 if unit.source == source: 512 result.append(unit) 513 return result 514 return None
515
516 - def translate(self, source):
517 """Returns the translated string for a given source string. 518 519 @rtype: String or None 520 """ 521 unit = self.findunit(source) 522 if unit and unit.target: 523 return unit.target 524 else: 525 return None
526
527 - def remove_unit_from_index(self, unit):
528 """Remove a unit from source and locaton indexes""" 529 530 def remove_unit(source): 531 if source in self.sourceindex: 532 try: 533 self.sourceindex[source].remove(unit) 534 if len(self.sourceindex[source]) == 0: 535 del(self.sourceindex[source]) 536 except ValueError: 537 pass
538 539 if unit.hasplural(): 540 for source in unit.source.strings: 541 remove_unit(source) 542 else: 543 remove_unit(unit.source) 544 545 for location in unit.getlocations(): 546 if location in self.locationindex and self.locationindex[location] is not None \ 547 and self.locationindex[location] == unit: 548 del(self.locationindex[location])
549
550 - def add_unit_to_index(self, unit):
551 """Add a unit to source and location idexes""" 552 self.id_index[unit.getid()] = unit 553 554 def insert_unit(source): 555 if not source in self.sourceindex: 556 self.sourceindex[source] = [unit] 557 else: 558 self.sourceindex[source].append(unit)
559 560 if unit.hasplural(): 561 for source in unit.source.strings: 562 insert_unit(source) 563 else: 564 insert_unit(unit.source) 565 566 for location in unit.getlocations(): 567 if location in self.locationindex: 568 # if sources aren't unique, don't use them 569 #FIXME: maybe better store a list of units like sourceindex 570 self.locationindex[location] = None 571 else: 572 self.locationindex[location] = unit 573
574 - def makeindex(self):
575 """Indexes the items in this store. At least .sourceindex should be usefull.""" 576 self.locationindex = {} 577 self.sourceindex = {} 578 self.id_index = {} 579 for index, unit in enumerate(self.units): 580 unit.index = index 581 if unit.istranslatable(): 582 self.add_unit_to_index(unit)
583
584 - def require_index(self):
585 """make sure source index exists""" 586 if not hasattr(self, "id_index"): 587 self.makeindex()
588
589 - def getids(self, filename=None):
590 """return a list of unit ids""" 591 self.require_index() 592 return self.id_index.keys()
593
594 - def __getstate__(self):
595 odict = self.__dict__.copy() 596 odict['fileobj'] = None 597 return odict
598
599 - def __setstate__(self, dict):
600 self.__dict__.update(dict) 601 if getattr(self, "filename", False): 602 self.fileobj = open(self.filename)
603
604 - def __str__(self):
605 """Converts to a string representation that can be parsed back using L{parsestring()}.""" 606 # We can't pickle fileobj if it is there, so let's hide it for a while. 607 fileobj = getattr(self, "fileobj", None) 608 self.fileobj = None 609 dump = pickle.dumps(self) 610 self.fileobj = fileobj 611 return dump
612
613 - def isempty(self):
614 """Returns True if the object doesn't contain any translation units.""" 615 if len(self.units) == 0: 616 return True 617 for unit in self.units: 618 if unit.istranslatable(): 619 return False 620 return True
621
622 - def _assignname(self):
623 """Tries to work out what the name of the filesystem file is and 624 assigns it to .filename.""" 625 fileobj = getattr(self, "fileobj", None) 626 if fileobj: 627 filename = getattr(fileobj, "name", getattr(fileobj, "filename", None)) 628 if filename: 629 self.filename = filename
630
631 - def parsestring(cls, storestring):
632 """Converts the string representation back to an object.""" 633 newstore = cls() 634 if storestring: 635 newstore.parse(storestring) 636 return newstore
637 parsestring = classmethod(parsestring) 638
639 - def parse(self, data):
640 """parser to process the given source string""" 641 self.units = pickle.loads(data).units
642
643 - def savefile(self, storefile):
644 """Writes the string representation to the given file (or filename).""" 645 if isinstance(storefile, basestring): 646 mode = 'w' 647 if self._binary: 648 mode = 'wb' 649 storefile = open(storefile, mode) 650 self.fileobj = storefile 651 self._assignname() 652 storestring = str(self) 653 storefile.write(storestring) 654 storefile.close()
655
656 - def save(self):
657 """Save to the file that data was originally read from, if available.""" 658 fileobj = getattr(self, "fileobj", None) 659 mode = 'w' 660 if self._binary: 661 mode = 'wb' 662 if not fileobj: 663 filename = getattr(self, "filename", None) 664 if filename: 665 fileobj = file(filename, mode) 666 else: 667 fileobj.close() 668 filename = getattr(fileobj, "name", getattr(fileobj, "filename", None)) 669 if not filename: 670 raise ValueError("No file or filename to save to") 671 fileobj = fileobj.__class__(filename, mode) 672 self.savefile(fileobj)
673
674 - def parsefile(cls, storefile):
675 """Reads the given file (or opens the given filename) and parses back to an object.""" 676 mode = 'r' 677 if cls._binary: 678 mode = 'rb' 679 if isinstance(storefile, basestring): 680 storefile = open(storefile, mode) 681 mode = getattr(storefile, "mode", mode) 682 #For some reason GzipFile returns 1, so we have to test for that here 683 if mode == 1 or "r" in mode: 684 storestring = storefile.read() 685 storefile.close() 686 else: 687 storestring = "" 688 newstore = cls.parsestring(storestring) 689 newstore.fileobj = storefile 690 newstore._assignname() 691 return newstore
692 parsefile = classmethod(parsefile) 693