Package translate :: Package filters :: Module checks
[hide private]
[frames] | no frames]

Source Code for Module translate.filters.checks

   1  #!/usr/bin/env python 
   2  # -*- coding: utf-8 -*- 
   3  #  
   4  # Copyright 2004-2008 Zuza Software Foundation 
   5  #  
   6  # This file is part of translate. 
   7  # 
   8  # translate is free software; you can redistribute it and/or modify 
   9  # it under the terms of the GNU General Public License as published by 
  10  # the Free Software Foundation; either version 2 of the License, or 
  11  # (at your option) any later version. 
  12  #  
  13  # translate is distributed in the hope that it will be useful, 
  14  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
  15  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
  16  # GNU General Public License for more details. 
  17  # 
  18  # You should have received a copy of the GNU General Public License 
  19  # along with translate; if not, write to the Free Software 
  20  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
  21   
  22  """This is a set of validation checks that can be performed on translation  
  23  units. 
  24   
  25  Derivatives of UnitChecker (like StandardUnitChecker) check translation units, 
  26  and derivatives of TranslationChecker (like StandardChecker) check  
  27  (source, target) translation pairs. 
  28   
  29  When adding a new test here, please document and explain the behaviour on the  
  30  U{wiki <http://translate.sourceforge.net/wiki/toolkit/pofilter_tests>}. 
  31  """ 
  32   
  33  from translate.filters import helpers 
  34  from translate.filters import decoration 
  35  from translate.filters import prefilters 
  36  from translate.filters import spelling 
  37  from translate.lang import factory 
  38  from translate.lang import data 
  39  # The import of xliff could fail if the user doesn't have lxml installed. For 
  40  # now we try to continue gracefully to help users who aren't interested in  
  41  # support for XLIFF or other XML formats. 
  42  try: 
  43      from translate.storage import xliff 
  44  except ImportError, e: 
  45      xliff = None 
  46  # The import of xliff fail silently in the absence of lxml if another module 
  47  # already tried to import it unsuccessfully, so let's make 100% sure: 
  48  if not hasattr(xliff, "xliffunit"): 
  49      xliff = None 
  50  import re 
  51   
  52  # These are some regular expressions that are compiled for use in some tests 
  53   
  54  # printf syntax based on http://en.wikipedia.org/wiki/Printf which doens't cover everything we leave \w instead of specifying the exact letters as 
  55  # this should capture printf types defined in other platforms. 
  56  # extended to support Python named format specifiers 
  57  printf_pat = re.compile('%((?:(?P<ord>\d+)\$|\((?P<key>\w+)\))?(?P<fullvar>[+#-]*(?:\d+)?(?:\.\d+)?(hh\|h\|l\|ll)?(?P<type>[\w%])))') 
  58   
  59  # The name of the XML tag 
  60  tagname_re = re.compile("<[\s]*([\w\/]*)") 
  61   
  62  # We allow escaped quotes, probably for old escaping style of OOo helpcontent 
  63  #TODO: remove escaped strings once usage is audited 
  64  property_re = re.compile(" (\w*)=((\\\\?\".*?\\\\?\")|(\\\\?'.*?\\\\?'))") 
  65   
  66  # The whole tag 
  67  tag_re = re.compile("<[^>]+>") 
  68   
  69  gconf_attribute_re = re.compile('"[a-z_]+?"') 
  70   
71 -def tagname(string):
72 """Returns the name of the XML/HTML tag in string""" 73 return tagname_re.match(string).groups(1)[0]
74
75 -def intuplelist(pair, list):
76 """Tests to see if pair == (a,b,c) is in list, but handles None entries in 77 list as wildcards (only allowed in positions "a" and "c"). We take a shortcut 78 by only considering "c" if "b" has already matched.""" 79 a, b, c = pair 80 if (b, c) == (None, None): 81 #This is a tagname 82 return pair 83 for pattern in list: 84 x, y, z = pattern 85 if (x, y) in [(a, b), (None, b)]: 86 if z in [None, c]: 87 return pattern 88 return pair
89
90 -def tagproperties(strings, ignore):
91 """Returns all the properties in the XML/HTML tag string as 92 (tagname, propertyname, propertyvalue), but ignore those combinations 93 specified in ignore.""" 94 properties = [] 95 for string in strings: 96 tag = tagname(string) 97 properties += [(tag, None, None)] 98 #Now we isolate the attribute pairs. 99 pairs = property_re.findall(string) 100 for property, value, a, b in pairs: 101 #Strip the quotes: 102 value = value[1:-1] 103 104 canignore = False 105 if (tag, property, value) in ignore or \ 106 intuplelist((tag,property,value), ignore) != (tag,property,value): 107 canignore = True 108 break 109 if not canignore: 110 properties += [(tag, property, value)] 111 return properties
112 113
114 -class FilterFailure(Exception):
115 """This exception signals that a Filter didn't pass, and gives an explanation 116 or a comment"""
117 - def __init__(self, messages):
118 if not isinstance(messages, list): 119 messages = [messages] 120 assert isinstance(messages[0], unicode) # Assumption: all of same type 121 joined = u", ".join(messages) 122 Exception.__init__(self, joined) 123 # Python 2.3 doesn't have .args 124 if not hasattr(self, "args"): 125 self.args = joined
126
127 -class SeriousFilterFailure(FilterFailure):
128 """This exception signals that a Filter didn't pass, and the bad translation 129 might break an application (so the string will be marked fuzzy)""" 130 pass
131 132 #(tag, attribute, value) specifies a certain attribute which can be changed/ 133 #ignored if it exists inside tag. In the case where there is a third element 134 #in the tuple, it indicates a property value that can be ignored if present 135 #(like defaults, for example) 136 #If a certain item is None, it indicates that it is relevant for all values of 137 #the property/tag that is specified as None. A non-None value of "value" 138 #indicates that the value of the attribute must be taken into account. 139 common_ignoretags = [(None, "xml-lang", None)] 140 common_canchangetags = [("img", "alt", None), (None, "title", None)] 141 # Actually the title tag is allowed on many tags in HTML (but probably not all) 142
143 -class CheckerConfig(object):
144 """object representing the configuration of a checker"""
145 - def __init__(self, targetlanguage=None, accelmarkers=None, varmatches=None, 146 notranslatewords=None, musttranslatewords=None, validchars=None, 147 punctuation=None, endpunctuation=None, ignoretags=None, 148 canchangetags=None, criticaltests=None, credit_sources=None):
149 # Init lists 150 self.accelmarkers = self._init_list(accelmarkers) 151 self.varmatches = self._init_list(varmatches) 152 self.criticaltests = self._init_list(criticaltests) 153 self.credit_sources = self._init_list(credit_sources) 154 # Lang data 155 self.targetlanguage = targetlanguage 156 self.updatetargetlanguage(targetlanguage) 157 self.sourcelang = factory.getlanguage('en') 158 # Inits with default values 159 self.punctuation = self._init_default(data.normalized_unicode(punctuation), self.lang.punctuation) 160 self.endpunctuation = self._init_default(data.normalized_unicode(endpunctuation), self.lang.sentenceend) 161 self.ignoretags = self._init_default(ignoretags, common_ignoretags) 162 self.canchangetags = self._init_default(canchangetags, common_canchangetags) 163 # Other data 164 # TODO: allow user configuration of untranslatable words 165 self.notranslatewords = dict.fromkeys([data.normalized_unicode(key) for key in self._init_list(notranslatewords)]) 166 self.musttranslatewords = dict.fromkeys([data.normalized_unicode(key) for key in self._init_list(musttranslatewords)]) 167 validchars = data.normalized_unicode(validchars) 168 self.validcharsmap = {} 169 self.updatevalidchars(validchars)
170
171 - def _init_list(self, list):
172 """initialise configuration paramaters that are lists 173 174 @type list: List 175 @param list: None (we'll initialise a blank list) or a list paramater 176 @rtype: List 177 """ 178 if list is None: 179 list = [] 180 return list
181
182 - def _init_default(self, param, default):
183 """initialise parameters that can have default options 184 185 @param param: the user supplied paramater value 186 @param default: default values when param is not specified 187 @return: the paramater as specified by the user of the default settings 188 """ 189 if param is None: 190 return default 191 return param
192
193 - def update(self, otherconfig):
194 """combines the info in otherconfig into this config object""" 195 self.targetlanguage = otherconfig.targetlanguage or self.targetlanguage 196 self.updatetargetlanguage(self.targetlanguage) 197 self.accelmarkers.extend([c for c in otherconfig.accelmarkers if not c in self.accelmarkers]) 198 self.varmatches.extend(otherconfig.varmatches) 199 self.notranslatewords.update(otherconfig.notranslatewords) 200 self.musttranslatewords.update(otherconfig.musttranslatewords) 201 self.validcharsmap.update(otherconfig.validcharsmap) 202 self.punctuation += otherconfig.punctuation 203 self.endpunctuation += otherconfig.endpunctuation 204 #TODO: consider also updating in the following cases: 205 self.ignoretags = otherconfig.ignoretags 206 self.canchangetags = otherconfig.canchangetags 207 self.criticaltests.extend(otherconfig.criticaltests) 208 self.credit_sources = otherconfig.credit_sources
209
210 - def updatevalidchars(self, validchars):
211 """updates the map that eliminates valid characters""" 212 if validchars is None: 213 return True 214 validcharsmap = dict([(ord(validchar), None) for validchar in data.normalized_unicode(validchars)]) 215 self.validcharsmap.update(validcharsmap)
216
217 - def updatetargetlanguage(self, langcode):
218 """Updates the target language in the config to the given target language""" 219 self.lang = factory.getlanguage(langcode)
220
221 -def cache_results(f):
222 def cached_f(self, param1): 223 key = (f.__name__, param1) 224 res_cache = self.results_cache 225 if key in res_cache: 226 return res_cache[key] 227 else: 228 value = f(self, param1) 229 res_cache[key] = value 230 return value
231 return cached_f 232
233 -class UnitChecker(object):
234 """Parent Checker class which does the checking based on functions available 235 in derived classes.""" 236 preconditions = {} 237
238 - def __init__(self, checkerconfig=None, excludefilters=None, limitfilters=None, errorhandler=None):
239 self.errorhandler = errorhandler 240 if checkerconfig is None: 241 self.setconfig(CheckerConfig()) 242 else: 243 self.setconfig(checkerconfig) 244 # exclude functions defined in UnitChecker from being treated as tests... 245 self.helperfunctions = {} 246 for functionname in dir(UnitChecker): 247 function = getattr(self, functionname) 248 if callable(function): 249 self.helperfunctions[functionname] = function 250 self.defaultfilters = self.getfilters(excludefilters, limitfilters) 251 252 self.results_cache = {}
253
254 - def getfilters(self, excludefilters=None, limitfilters=None):
255 """returns dictionary of available filters, including/excluding those in 256 the given lists""" 257 filters = {} 258 if limitfilters is None: 259 # use everything available unless instructed 260 limitfilters = dir(self) 261 if excludefilters is None: 262 excludefilters = {} 263 for functionname in limitfilters: 264 if functionname in excludefilters: continue 265 if functionname in self.helperfunctions: continue 266 if functionname == "errorhandler": continue 267 filterfunction = getattr(self, functionname, None) 268 if not callable(filterfunction): continue 269 filters[functionname] = filterfunction 270 return filters
271
272 - def setconfig(self, config):
273 """sets the accelerator list""" 274 self.config = config 275 self.accfilters = [prefilters.filteraccelerators(accelmarker) for accelmarker in self.config.accelmarkers] 276 self.varfilters = [prefilters.filtervariables(startmatch, endmatch, prefilters.varname) 277 for startmatch, endmatch in self.config.varmatches] 278 self.removevarfilter = [prefilters.filtervariables(startmatch, endmatch, prefilters.varnone) 279 for startmatch, endmatch in self.config.varmatches]
280
281 - def setsuggestionstore(self, store):
282 """Sets the filename that a checker should use for evaluating suggestions.""" 283 self.suggestion_store = store 284 if self.suggestion_store: 285 self.suggestion_store.require_index()
286
287 - def filtervariables(self, str1):
288 """filter out variables from str1""" 289 return helpers.multifilter(str1, self.varfilters)
290 filtervariables = cache_results(filtervariables) 291
292 - def removevariables(self, str1):
293 """remove variables from str1""" 294 return helpers.multifilter(str1, self.removevarfilter)
295 removevariables = cache_results(removevariables) 296
297 - def filteraccelerators(self, str1):
298 """filter out accelerators from str1""" 299 return helpers.multifilter(str1, self.accfilters, None)
300 filteraccelerators = cache_results(filteraccelerators) 301
302 - def filteraccelerators_by_list(self, str1, acceptlist=None):
303 """filter out accelerators from str1""" 304 return helpers.multifilter(str1, self.accfilters, acceptlist)
305
306 - def filterwordswithpunctuation(self, str1):
307 """replaces words with punctuation with their unpunctuated equivalents""" 308 return prefilters.filterwordswithpunctuation(str1)
309 filterwordswithpunctuation = cache_results(filterwordswithpunctuation) 310
311 - def filterxml(self, str1):
312 """filter out XML from the string so only text remains""" 313 return tag_re.sub("", str1)
314 filterxml = cache_results(filterxml) 315
316 - def run_test(self, test, unit):
317 """Runs the given test on the given unit. 318 319 Note that this can raise a FilterFailure as part of normal operation""" 320 return test(unit)
321
322 - def run_filters(self, unit):
323 """run all the tests in this suite, return failures as testname, message_or_exception""" 324 self.results_cache = {} 325 failures = {} 326 ignores = self.config.lang.ignoretests[:] 327 functionnames = self.defaultfilters.keys() 328 priorityfunctionnames = self.preconditions.keys() 329 otherfunctionnames = filter(lambda functionname: functionname not in self.preconditions, functionnames) 330 for functionname in priorityfunctionnames + otherfunctionnames: 331 if functionname in ignores: 332 continue 333 filterfunction = getattr(self, functionname, None) 334 # this filterfunction may only be defined on another checker if using TeeChecker 335 if filterfunction is None: 336 continue 337 filtermessage = filterfunction.__doc__ 338 try: 339 filterresult = self.run_test(filterfunction, unit) 340 except FilterFailure, e: 341 filterresult = False 342 filtermessage = e.args[0] 343 except Exception, e: 344 if self.errorhandler is None: 345 raise ValueError("error in filter %s: %r, %r, %s" % \ 346 (functionname, unit.source, unit.target, e)) 347 else: 348 filterresult = self.errorhandler(functionname, unit.source, unit.target, e) 349 if not filterresult: 350 # we test some preconditions that aren't actually a cause for failure 351 if functionname in self.defaultfilters: 352 failures[functionname] = filtermessage 353 if functionname in self.preconditions: 354 for ignoredfunctionname in self.preconditions[functionname]: 355 ignores.append(ignoredfunctionname) 356 self.results_cache = {} 357 return failures
358
359 -class TranslationChecker(UnitChecker):
360 """A checker that passes source and target strings to the checks, not the 361 whole unit. 362 363 This provides some speedup and simplifies testing."""
364 - def __init__(self, checkerconfig=None, excludefilters=None, limitfilters=None, errorhandler=None):
365 super(TranslationChecker, self).__init__(checkerconfig, excludefilters, limitfilters, errorhandler)
366
367 - def run_test(self, test, unit):
368 """Runs the given test on the given unit. 369 370 Note that this can raise a FilterFailure as part of normal operation.""" 371 if self.hasplural: 372 filtermessages = [] 373 filterresult = True 374 for pluralform in unit.target.strings: 375 try: 376 if not test(self.str1, unicode(pluralform)): 377 filterresult = False 378 except FilterFailure, e: 379 filterresult = False 380 filtermessages.append( unicode(e.args) ) 381 if not filterresult and filtermessages: 382 raise FilterFailure(filtermessages) 383 else: 384 return filterresult 385 else: 386 return test(self.str1, self.str2)
387
388 - def run_filters(self, unit):
389 """Do some optimisation by caching some data of the unit for the benefit 390 of run_test().""" 391 self.str1 = data.normalized_unicode(unit.source) or u"" 392 self.str2 = data.normalized_unicode(unit.target) or u"" 393 self.hasplural = unit.hasplural() 394 self.locations = unit.getlocations() 395 return super(TranslationChecker, self).run_filters(unit)
396
397 -class TeeChecker:
398 """A Checker that controls multiple checkers."""
399 - def __init__(self, checkerconfig=None, excludefilters=None, limitfilters=None, 400 checkerclasses=None, errorhandler=None, languagecode=None):
401 """construct a TeeChecker from the given checkers""" 402 self.limitfilters = limitfilters 403 if checkerclasses is None: 404 checkerclasses = [StandardChecker] 405 self.checkers = [checkerclass(checkerconfig=checkerconfig, excludefilters=excludefilters, limitfilters=limitfilters, errorhandler=errorhandler) for checkerclass in checkerclasses] 406 if languagecode: 407 for checker in self.checkers: 408 checker.config.updatetargetlanguage(languagecode) 409 # Let's hook up the language specific checker 410 lang_checker = self.checkers[0].config.lang.checker 411 if lang_checker: 412 self.checkers.append(lang_checker) 413 414 self.combinedfilters = self.getfilters(excludefilters, limitfilters) 415 self.config = checkerconfig or self.checkers[0].config
416
417 - def getfilters(self, excludefilters=None, limitfilters=None):
418 """returns dictionary of available filters, including/excluding those in 419 the given lists""" 420 if excludefilters is None: 421 excludefilters = {} 422 filterslist = [checker.getfilters(excludefilters, limitfilters) for checker in self.checkers] 423 self.combinedfilters = {} 424 for filters in filterslist: 425 self.combinedfilters.update(filters) 426 # TODO: move this somewhere more sensible (a checkfilters method?) 427 if limitfilters is not None: 428 for filtername in limitfilters: 429 if not filtername in self.combinedfilters: 430 import sys 431 print >> sys.stderr, "warning: could not find filter %s" % filtername 432 return self.combinedfilters
433
434 - def run_filters(self, unit):
435 """run all the tests in the checker's suites""" 436 failures = {} 437 for checker in self.checkers: 438 failures.update(checker.run_filters(unit)) 439 return failures
440
441 - def setsuggestionstore(self, store):
442 """Sets the filename that a checker should use for evaluating suggestions.""" 443 for checker in self.checkers: 444 checker.setsuggestionstore(store)
445 446
447 -class StandardChecker(TranslationChecker):
448 """The basic test suite for source -> target translations."""
449 - def untranslated(self, str1, str2):
450 """checks whether a string has been translated at all""" 451 str2 = prefilters.removekdecomments(str2) 452 return not (len(str1.strip()) > 0 and len(str2) == 0)
453
454 - def unchanged(self, str1, str2):
455 """checks whether a translation is basically identical to the original string""" 456 str1 = self.filteraccelerators(self.removevariables(str1)).strip() 457 str2 = self.filteraccelerators(self.removevariables(str2)).strip() 458 if len(str1) < 2: 459 return True 460 # If the whole string is upperase, or nothing in the string can go 461 # towards uppercase, let's assume there is nothing translatable 462 # TODO: reconsider 463 if (str1.isupper() or str1.upper() == str1) and str1 == str2: 464 return True 465 if self.config.notranslatewords: 466 words1 = str1.split() 467 if len(words1) == 1 and [word for word in words1 if word in self.config.notranslatewords]: 468 #currently equivalent to: 469 # if len(words1) == 1 and words1[0] in self.config.notranslatewords: 470 #why do we only test for one notranslate word? 471 return True 472 # we could also check for things like str1.isnumeric(), but the test 473 # above (str1.upper() == str1) makes this unnecessary 474 if str1.lower() == str2.lower(): 475 raise FilterFailure(u"please translate") 476 return True
477
478 - def blank(self, str1, str2):
479 """checks whether a translation only contains spaces""" 480 len1 = len(str1.strip()) 481 len2 = len(str2.strip()) 482 return not (len1 > 0 and len(str2) != 0 and len2 == 0)
483
484 - def short(self, str1, str2):
485 """checks whether a translation is much shorter than the original string""" 486 len1 = len(str1.strip()) 487 len2 = len(str2.strip()) 488 return not ((len1 > 0) and (0 < len2 < (len1 * 0.1)) or ((len1 > 1) and (len2 == 1)))
489
490 - def long(self, str1, str2):
491 """checks whether a translation is much longer than the original string""" 492 len1 = len(str1.strip()) 493 len2 = len(str2.strip()) 494 return not ((len1 > 0) and (0 < len1 < (len2 * 0.1)) or ((len1 == 1) and (len2 > 1)))
495
496 - def escapes(self, str1, str2):
497 """checks whether escaping is consistent between the two strings""" 498 if not helpers.countsmatch(str1, str2, (u"\\", u"\\\\")): 499 escapes1 = u", ".join([u"'%s'" % word for word in str1.split() if u"\\" in word]) 500 escapes2 = u", ".join([u"'%s'" % word for word in str2.split() if u"\\" in word]) 501 raise SeriousFilterFailure(u"escapes in original (%s) don't match escapes in translation (%s)" % (escapes1, escapes2)) 502 else: 503 return True
504
505 - def newlines(self, str1, str2):
506 """checks whether newlines are consistent between the two strings""" 507 if not helpers.countsmatch(str1, str2, (u"\n", u"\r")): 508 raise FilterFailure(u"line endings in original don't match line endings in translation") 509 else: 510 return True
511
512 - def tabs(self, str1, str2):
513 """checks whether tabs are consistent between the two strings""" 514 if not helpers.countmatch(str1, str2, "\t"): 515 raise SeriousFilterFailure(u"tabs in original don't match tabs in translation") 516 else: 517 return True
518
519 - def singlequoting(self, str1, str2):
520 """checks whether singlequoting is consistent between the two strings""" 521 str1 = self.filterwordswithpunctuation(self.filteraccelerators(self.filtervariables(str1))) 522 str2 = self.filterwordswithpunctuation(self.filteraccelerators(self.filtervariables(str2))) 523 return helpers.countsmatch(str1, str2, (u"'", u"''", u"\\'"))
524
525 - def doublequoting(self, str1, str2):
526 """checks whether doublequoting is consistent between the two strings""" 527 str1 = self.filteraccelerators(self.filtervariables(str1)) 528 str1 = self.filterxml(str1) 529 str1 = self.config.lang.punctranslate(str1) 530 str2 = self.filteraccelerators(self.filtervariables(str2)) 531 str2 = self.filterxml(str2) 532 return helpers.countsmatch(str1, str2, (u'"', u'""', u'\\"', u"«", u"»", u"“", u"”"))
533
534 - def doublespacing(self, str1, str2):
535 """checks for bad double-spaces by comparing to original""" 536 str1 = self.filteraccelerators(str1) 537 str2 = self.filteraccelerators(str2) 538 return helpers.countmatch(str1, str2, u" ")
539
540 - def puncspacing(self, str1, str2):
541 """checks for bad spacing after punctuation""" 542 if str1.find(u" ") == -1: 543 return True 544 str1 = self.filteraccelerators(self.filtervariables(str1)) 545 str1 = self.config.lang.punctranslate(str1) 546 str2 = self.filteraccelerators(self.filtervariables(str2)) 547 for puncchar in self.config.punctuation: 548 plaincount1 = str1.count(puncchar) 549 plaincount2 = str2.count(puncchar) 550 if not plaincount1 or plaincount1 != plaincount2: 551 continue 552 spacecount1 = str1.count(puncchar + u" ") 553 spacecount2 = str2.count(puncchar + u" ") 554 if spacecount1 != spacecount2: 555 # handle extra spaces that are because of transposed punctuation 556 if str1.endswith(puncchar) != str2.endswith(puncchar) and abs(spacecount1-spacecount2) == 1: 557 continue 558 return False 559 return True
560
561 - def printf(self, str1, str2):
562 """checks whether printf format strings match""" 563 count1 = count2 = plural = None 564 # self.hasplural only set by run_filters, not always available 565 if 'hasplural' in self.__dict__: 566 plural = self.hasplural 567 for var_num2, match2 in enumerate(printf_pat.finditer(str2)): 568 count2 = var_num2 + 1 569 str2key = match2.group('key') 570 if match2.group('ord'): 571 for var_num1, match1 in enumerate(printf_pat.finditer(str1)): 572 count1 = var_num1 + 1 573 if int(match2.group('ord')) == var_num1 + 1: 574 if match2.group('fullvar') != match1.group('fullvar'): 575 return 0 576 elif str2key: 577 str1key = None 578 for var_num1, match1 in enumerate(printf_pat.finditer(str1)): 579 count1 = var_num1 + 1 580 if match1.group('key') and str2key == match1.group('key'): 581 str1key = match1.group('key') 582 # '%.0s' "placeholder" in plural will match anything 583 if plural and match2.group('fullvar') == '.0s': 584 continue 585 if match1.group('fullvar') != match2.group('fullvar'): 586 return 0 587 if str1key == None: 588 return 0 589 else: 590 for var_num1, match1 in enumerate(printf_pat.finditer(str1)): 591 count1 = var_num1 + 1 592 # '%.0s' "placeholder" in plural will match anything 593 if plural and match2.group('fullvar') == '.0s': 594 continue 595 if (var_num1 == var_num2) and (match1.group('fullvar') != match2.group('fullvar')): 596 return 0 597 598 if count2 is None: 599 if list(printf_pat.finditer(str1)): 600 return 0 601 602 if (count1 or count2) and (count1 != count2): 603 return 0 604 return 1
605
606 - def accelerators(self, str1, str2):
607 """checks whether accelerators are consistent between the two strings""" 608 str1 = self.filtervariables(str1) 609 str2 = self.filtervariables(str2) 610 messages = [] 611 for accelmarker in self.config.accelmarkers: 612 counter1 = decoration.countaccelerators(accelmarker, self.config.sourcelang.validaccel) 613 counter2 = decoration.countaccelerators(accelmarker, self.config.lang.validaccel) 614 count1, countbad1 = counter1(str1) 615 count2, countbad2 = counter2(str2) 616 getaccel = decoration.getaccelerators(accelmarker, self.config.lang.validaccel) 617 accel2, bad2 = getaccel(str2) 618 if count1 == count2: 619 continue 620 if count1 == 1 and count2 == 0: 621 if countbad2 == 1: 622 messages.append(u"accelerator %s appears before an invalid accelerator character '%s' (eg. space)" % (accelmarker, bad2[0])) 623 else: 624 messages.append(u"accelerator %s is missing from translation" % accelmarker) 625 elif count1 == 0: 626 messages.append(u"accelerator %s does not occur in original and should not be in translation" % accelmarker) 627 elif count1 == 1 and count2 > count1: 628 messages.append(u"accelerator %s is repeated in translation" % accelmarker) 629 else: 630 messages.append(u"accelerator %s occurs %d time(s) in original and %d time(s) in translation" % (accelmarker, count1, count2)) 631 if messages: 632 if "accelerators" in self.config.criticaltests: 633 raise SeriousFilterFailure(messages) 634 else: 635 raise FilterFailure(messages) 636 return True
637 638 # def acceleratedvariables(self, str1, str2): 639 # """checks that no variables are accelerated""" 640 # messages = [] 641 # for accelerator in self.config.accelmarkers: 642 # for variablestart, variableend in self.config.varmatches: 643 # error = accelerator + variablestart 644 # if str1.find(error) >= 0: 645 # messages.append(u"original has an accelerated variable") 646 # if str2.find(error) >= 0: 647 # messages.append(u"translation has an accelerated variable") 648 # if messages: 649 # raise FilterFailure(messages) 650 # return True 651
652 - def variables(self, str1, str2):
653 """checks whether variables of various forms are consistent between the two strings""" 654 messages = [] 655 mismatch1, mismatch2 = [], [] 656 varnames1, varnames2 = [], [] 657 for startmarker, endmarker in self.config.varmatches: 658 varchecker = decoration.getvariables(startmarker, endmarker) 659 if startmarker and endmarker: 660 if isinstance(endmarker, int): 661 redecorate = lambda var: startmarker + var 662 else: 663 redecorate = lambda var: startmarker + var + endmarker 664 elif startmarker: 665 redecorate = lambda var: startmarker + var 666 else: 667 redecorate = lambda var: var 668 vars1 = varchecker(str1) 669 vars2 = varchecker(str2) 670 if vars1 != vars2: 671 # we use counts to compare so we can handle multiple variables 672 vars1, vars2 = [var for var in vars1 if vars1.count(var) > vars2.count(var)], [var for var in vars2 if vars1.count(var) < vars2.count(var)] 673 # filter variable names we've already seen, so they aren't matched by more than one filter... 674 vars1, vars2 = [var for var in vars1 if var not in varnames1], [var for var in vars2 if var not in varnames2] 675 varnames1.extend(vars1) 676 varnames2.extend(vars2) 677 vars1 = map(redecorate, vars1) 678 vars2 = map(redecorate, vars2) 679 mismatch1.extend(vars1) 680 mismatch2.extend(vars2) 681 if mismatch1: 682 messages.append(u"do not translate: %s" % u", ".join(mismatch1)) 683 elif mismatch2: 684 messages.append(u"translation contains variables not in original: %s" % u", ".join(mismatch2)) 685 if messages and mismatch1: 686 raise SeriousFilterFailure(messages) 687 elif messages: 688 raise FilterFailure(messages) 689 return True
690
691 - def functions(self, str1, str2):
692 """checks that function names are not translated""" 693 return helpers.funcmatch(str1, str2, decoration.getfunctions, self.config.punctuation)
694
695 - def emails(self, str1, str2):
696 """checks that emails are not translated""" 697 return helpers.funcmatch(str1, str2, decoration.getemails)
698
699 - def urls(self, str1, str2):
700 """checks that URLs are not translated""" 701 return helpers.funcmatch(str1, str2, decoration.geturls)
702
703 - def numbers(self, str1, str2):
704 """checks whether numbers of various forms are consistent between the two strings""" 705 return helpers.countsmatch(str1, str2, decoration.getnumbers(str1))
706
707 - def startwhitespace(self, str1, str2):
708 """checks whether whitespace at the beginning of the strings matches""" 709 return helpers.funcmatch(str1, str2, decoration.spacestart)
710
711 - def endwhitespace(self, str1, str2):
712 """checks whether whitespace at the end of the strings matches""" 713 str1 = self.config.lang.punctranslate(str1) 714 return helpers.funcmatch(str1, str2, decoration.spaceend)
715
716 - def startpunc(self, str1, str2):
717 """checks whether punctuation at the beginning of the strings match""" 718 str1 = self.filteraccelerators(self.filtervariables(self.filterwordswithpunctuation(str1))) 719 str1 = self.config.lang.punctranslate(str1) 720 str2 = self.filteraccelerators(self.filtervariables(self.filterwordswithpunctuation(str2))) 721 return helpers.funcmatch(str1, str2, decoration.puncstart, self.config.punctuation)
722
723 - def endpunc(self, str1, str2):
724 """checks whether punctuation at the end of the strings match""" 725 str1 = self.filteraccelerators(self.filtervariables(self.filterwordswithpunctuation(str1))) 726 str1 = self.config.lang.punctranslate(str1) 727 str2 = self.filteraccelerators(self.filtervariables(self.filterwordswithpunctuation(str2))) 728 str1 = str1.rstrip() 729 str2 = str2.rstrip() 730 return helpers.funcmatch(str1, str2, decoration.puncend, self.config.endpunctuation + u":")
731
732 - def purepunc(self, str1, str2):
733 """checks that strings that are purely punctuation are not changed""" 734 # this test is a subset of startandend 735 if (decoration.ispurepunctuation(str1)): 736 return str1 == str2 737 else: 738 return not decoration.ispurepunctuation(str2)
739
740 - def brackets(self, str1, str2):
741 """checks that the number of brackets in both strings match""" 742 str1 = self.filtervariables(str1) 743 str2 = self.filtervariables(str2) 744 messages = [] 745 missing = [] 746 extra = [] 747 for bracket in (u"[", u"]", u"{", u"}", u"(", u")"): 748 count1 = str1.count(bracket) 749 count2 = str2.count(bracket) 750 if count2 < count1: 751 missing.append(u"'%s'" % bracket) 752 elif count2 > count1: 753 extra.append(u"'%s'" % bracket) 754 if missing: 755 messages.append(u"translation is missing %s" % u", ".join(missing)) 756 if extra: 757 messages.append(u"translation has extra %s" % u", ".join(extra)) 758 if messages: 759 raise FilterFailure(messages) 760 return True
761
762 - def sentencecount(self, str1, str2):
763 """checks that the number of sentences in both strings match""" 764 str1 = self.filteraccelerators(str1) 765 str2 = self.filteraccelerators(str2) 766 sentences1 = len(self.config.sourcelang.sentences(str1)) 767 sentences2 = len(self.config.lang.sentences(str2)) 768 if not sentences1 == sentences2: 769 raise FilterFailure(u"The number of sentences differ: %d versus %d" % (sentences1, sentences2)) 770 return True
771
772 - def options(self, str1, str2):
773 """checks that options are not translated""" 774 str1 = self.filtervariables(str1) 775 for word1 in str1.split(): 776 if word1 != u"--" and word1.startswith(u"--") and word1[-1].isalnum(): 777 parts = word1.split(u"=") 778 if not parts[0] in str2: 779 raise FilterFailure(u"The option %s does not occur or is translated in the translation." % parts[0]) 780 if len(parts) > 1 and parts[1] in str2: 781 raise FilterFailure(u"The parameter %(param)s in option %(option)s is not translated." % {"param": parts[1], "option": parts[0]}) 782 return True
783
784 - def startcaps(self, str1, str2):
785 """checks that the message starts with the correct capitalisation""" 786 str1 = self.filteraccelerators(str1) 787 str2 = self.filteraccelerators(str2) 788 if len(str1) > 1 and len(str2) > 1: 789 return self.config.sourcelang.capsstart(str1) == self.config.lang.capsstart(str2) 790 if len(str1) == 0 and len(str2) == 0: 791 return True 792 if len(str1) == 0 or len(str2) == 0: 793 return False 794 return True
795
796 - def simplecaps(self, str1, str2):
797 """checks the capitalisation of two strings isn't wildly different""" 798 str1 = self.removevariables(str1) 799 str2 = self.removevariables(str2) 800 # TODO: review this. The 'I' is specific to English, so it probably serves 801 # no purpose to get sourcelang.sentenceend 802 str1 = re.sub(u"[^%s]( I )" % self.config.sourcelang.sentenceend, u" i ", str1) 803 capitals1 = helpers.filtercount(str1, unicode.isupper) 804 capitals2 = helpers.filtercount(str2, unicode.isupper) 805 alpha1 = helpers.filtercount(str1, unicode.isalpha) 806 alpha2 = helpers.filtercount(str2, unicode.isalpha) 807 # Capture the all caps case 808 if capitals1 == alpha1: 809 return capitals2 == alpha2 810 # some heuristic tests to try and see that the style of capitals is vaguely the same 811 if capitals1 == 0 or capitals1 == 1: 812 return capitals2 == capitals1 813 elif capitals1 < len(str1) / 10: 814 return capitals2 <= len(str2) / 8 815 elif len(str1) < 10: 816 return abs(capitals1 - capitals2) < 3 817 elif capitals1 > len(str1) * 6 / 10: 818 return capitals2 > len(str2) * 6 / 10 819 else: 820 return abs(capitals1 - capitals2) < (len(str1) + len(str2)) / 6
821
822 - def acronyms(self, str1, str2):
823 """checks that acronyms that appear are unchanged""" 824 acronyms = [] 825 allowed = [] 826 for startmatch, endmatch in self.config.varmatches: 827 allowed += decoration.getvariables(startmatch, endmatch)(str1) 828 allowed += self.config.musttranslatewords.keys() 829 str1 = self.filteraccelerators(self.filtervariables(str1)) 830 iter = self.config.lang.word_iter(str1) 831 str2 = self.filteraccelerators(self.filtervariables(str2)) 832 #TODO: strip XML? - should provide better error messsages 833 # see mail/chrome/messanger/smime.properties.po 834 #TODO: consider limiting the word length for recognising acronyms to 835 #something like 5/6 characters 836 for word in iter: 837 if word.isupper() and len(word) > 1 and word not in allowed: 838 if str2.find(word) == -1: 839 acronyms.append(word) 840 if acronyms: 841 raise FilterFailure(u"acronyms should not be translated: " + u", ".join(acronyms)) 842 return True
843
844 - def doublewords(self, str1, str2):
845 """checks for repeated words in the translation""" 846 lastword = "" 847 without_newlines = "\n".join(str2.split("\n")) 848 words = self.filteraccelerators(self.removevariables(without_newlines)).replace(u".", u"").lower().split() 849 for word in words: 850 if word == lastword and word not in self.config.lang.validdoublewords: 851 raise FilterFailure(u"The word '%s' is repeated" % word) 852 lastword = word 853 return True
854
855 - def notranslatewords(self, str1, str2):
856 """checks that words configured as untranslatable appear in the translation too""" 857 if not self.config.notranslatewords: 858 return True 859 str1 = self.filtervariables(str1) 860 str2 = self.filtervariables(str2) 861 #The above is full of strange quotes and things in utf-8 encoding. 862 #single apostrophe perhaps problematic in words like "doesn't" 863 for seperator in self.config.punctuation: 864 str1 = str1.replace(seperator, u" ") 865 str2 = str2.replace(seperator, u" ") 866 words1 = self.filteraccelerators(str1).split() 867 words2 = self.filteraccelerators(str2).split() 868 stopwords = [word for word in words1 if word in self.config.notranslatewords and word not in words2] 869 if stopwords: 870 raise FilterFailure(u"do not translate: %s" % (u", ".join(stopwords))) 871 return True
872
873 - def musttranslatewords(self, str1, str2):
874 """checks that words configured as definitely translatable don't appear in 875 the translation""" 876 if not self.config.musttranslatewords: 877 return True 878 str1 = self.removevariables(str1) 879 str2 = self.removevariables(str2) 880 #The above is full of strange quotes and things in utf-8 encoding. 881 #single apostrophe perhaps problematic in words like "doesn't" 882 for seperator in self.config.punctuation: 883 str1 = str1.replace(seperator, u" ") 884 str2 = str2.replace(seperator, u" ") 885 words1 = self.filteraccelerators(str1).split() 886 words2 = self.filteraccelerators(str2).split() 887 stopwords = [word for word in words1 if word in self.config.musttranslatewords and word in words2] 888 if stopwords: 889 raise FilterFailure(u"please translate: %s" % (u", ".join(stopwords))) 890 return True
891
892 - def validchars(self, str1, str2):
893 """checks that only characters specified as valid appear in the translation""" 894 if not self.config.validcharsmap: 895 return True 896 invalid1 = str1.translate(self.config.validcharsmap) 897 invalid2 = str2.translate(self.config.validcharsmap) 898 invalidchars = [u"'%s' (\\u%04x)" % (invalidchar, ord(invalidchar)) for invalidchar in invalid2 if invalidchar not in invalid1] 899 if invalidchars: 900 raise FilterFailure(u"invalid chars: %s" % (u", ".join(invalidchars))) 901 return True
902
903 - def filepaths(self, str1, str2):
904 """checks that file paths have not been translated""" 905 for word1 in self.filteraccelerators(str1).split(): 906 if word1.startswith(u"/"): 907 if not helpers.countsmatch(str1, str2, (word1,)): 908 return False 909 return True
910
911 - def xmltags(self, str1, str2):
912 """checks that XML/HTML tags have not been translated""" 913 tags1 = tag_re.findall(str1) 914 if len(tags1) > 0: 915 if (len(tags1[0]) == len(str1)) and not u"=" in tags1[0]: 916 return True 917 tags2 = tag_re.findall(str2) 918 properties1 = tagproperties(tags1, self.config.ignoretags) 919 properties2 = tagproperties(tags2, self.config.ignoretags) 920 filtered1 = [] 921 filtered2 = [] 922 for property1 in properties1: 923 filtered1 += [intuplelist(property1, self.config.canchangetags)] 924 for property2 in properties2: 925 filtered2 += [intuplelist(property2, self.config.canchangetags)] 926 927 #TODO: consider the consequences of different ordering of attributes/tags 928 if filtered1 != filtered2: 929 return False 930 else: 931 # No tags in str1, let's just check that none were added in str2. This 932 # might be useful for fuzzy strings wrongly unfuzzied, for example. 933 tags2 = tag_re.findall(str2) 934 if len(tags2) > 0: 935 return False 936 return True
937
938 - def kdecomments(self, str1, str2):
939 """checks to ensure that no KDE style comments appear in the translation""" 940 return str2.find(u"\n_:") == -1 and not str2.startswith(u"_:")
941
942 - def compendiumconflicts(self, str1, str2):
943 """checks for Gettext compendium conflicts (#-#-#-#-#)""" 944 return str2.find(u"#-#-#-#-#") == -1
945
946 - def simpleplurals(self, str1, str2):
947 """checks for English style plural(s) for you to review""" 948 def numberofpatterns(string, patterns): 949 number = 0 950 for pattern in patterns: 951 number += len(re.findall(pattern, string)) 952 return number
953 954 sourcepatterns = ["\(s\)"] 955 targetpatterns = ["\(s\)"] 956 sourcecount = numberofpatterns(str1, sourcepatterns) 957 targetcount = numberofpatterns(str2, targetpatterns) 958 if self.config.lang.nplurals == 1: 959 return not targetcount 960 return sourcecount == targetcount
961
962 - def spellcheck(self, str1, str2):
963 """checks words that don't pass a spell check""" 964 if not self.config.targetlanguage: 965 return True 966 if not spelling.available: 967 return True 968 # TODO: filterxml? 969 str1 = self.filteraccelerators_by_list(self.filtervariables(str1), self.config.sourcelang.validaccel) 970 str2 = self.filteraccelerators_by_list(self.filtervariables(str2), self.config.lang.validaccel) 971 ignore1 = [] 972 messages = [] 973 for word, index, suggestions in spelling.check(str1, lang="en"): 974 ignore1.append(word) 975 for word, index, suggestions in spelling.check(str2, lang=self.config.targetlanguage): 976 if word in self.config.notranslatewords: 977 continue 978 if word in ignore1: 979 continue 980 # hack to ignore hyphenisation rules 981 if word in suggestions: 982 continue 983 messages.append(u"check spelling of %s (could be %s)" % (word, u" / ".join(suggestions[:5]))) 984 if messages: 985 raise FilterFailure(messages) 986 return True
987
988 - def credits(self, str1, str2):
989 """checks for messages containing translation credits instead of normal translations.""" 990 return not str1 in self.config.credit_sources
991 992 # If the precondition filter is run and fails then the other tests listed are ignored 993 preconditions = {"untranslated": ("simplecaps", "variables", "startcaps", 994 "accelerators", "brackets", "endpunc", 995 "acronyms", "xmltags", "startpunc", 996 "endwhitespace", "startwhitespace", 997 "escapes", "doublequoting", "singlequoting", 998 "filepaths", "purepunc", "doublespacing", 999 "sentencecount", "numbers", "isfuzzy", 1000 "isreview", "notranslatewords", "musttranslatewords", 1001 "emails", "simpleplurals", "urls", "printf", 1002 "tabs", "newlines", "functions", "options", 1003 "blank", "nplurals", "gconf"), 1004 "blank": ("simplecaps", "variables", "startcaps", 1005 "accelerators", "brackets", "endpunc", 1006 "acronyms", "xmltags", "startpunc", 1007 "endwhitespace", "startwhitespace", 1008 "escapes", "doublequoting", "singlequoting", 1009 "filepaths", "purepunc", "doublespacing", 1010 "sentencecount", "numbers", "isfuzzy", 1011 "isreview", "notranslatewords", "musttranslatewords", 1012 "emails", "simpleplurals", "urls", "printf", 1013 "tabs", "newlines", "functions", "options", 1014 "gconf"), 1015 "credits": ("simplecaps", "variables", "startcaps", 1016 "accelerators", "brackets", "endpunc", 1017 "acronyms", "xmltags", "startpunc", 1018 "escapes", "doublequoting", "singlequoting", 1019 "filepaths", "doublespacing", 1020 "sentencecount", "numbers", 1021 "emails", "simpleplurals", "urls", "printf", 1022 "tabs", "newlines", "functions", "options"), 1023 "purepunc": ("startcaps", "options"), 1024 # This is causing some problems since Python 2.6, as 1025 # startcaps is now seen as an important one to always execute 1026 # and could now be done before it is blocked by a failing 1027 # "untranslated" or "blank" test. This is probably happening 1028 # due to slightly different implementation of the internal 1029 # dict handling since Python 2.6. We should never have relied 1030 # on this ordering anyway. 1031 #"startcaps": ("simplecaps",), 1032 "endwhitespace": ("endpunc",), 1033 "startwhitespace":("startpunc",), 1034 "unchanged": ("doublewords",), 1035 "compendiumconflicts": ("accelerators", "brackets", "escapes", 1036 "numbers", "startpunc", "long", "variables", 1037 "startcaps", "sentencecount", "simplecaps", 1038 "doublespacing", "endpunc", "xmltags", 1039 "startwhitespace", "endwhitespace", 1040 "singlequoting", "doublequoting", 1041 "filepaths", "purepunc", "doublewords", "printf") } 1042 1043 # code to actually run the tests (use unittest?) 1044 1045 openofficeconfig = CheckerConfig( 1046 accelmarkers = ["~"], 1047 varmatches = [("&", ";"), ("%", "%"), ("%", None), ("%", 0), ("$(", ")"), ("$", "$"), ("${", "}"), ("#", "#"), ("#", 1), ("#", 0), ("($", ")"), ("$[", "]"), ("[", "]"), ("$", None)], 1048 ignoretags = [("alt", "xml-lang", None), ("ahelp", "visibility", "visible"), ("img", "width", None), ("img", "height", None)], 1049 canchangetags = [("link", "name", None)] 1050 ) 1051
1052 -class OpenOfficeChecker(StandardChecker):
1053 - def __init__(self, **kwargs):
1054 checkerconfig = kwargs.get("checkerconfig", None) 1055 if checkerconfig is None: 1056 checkerconfig = CheckerConfig() 1057 kwargs["checkerconfig"] = checkerconfig 1058 checkerconfig.update(openofficeconfig) 1059 StandardChecker.__init__(self, **kwargs)
1060 1061 mozillaconfig = CheckerConfig( 1062 accelmarkers = ["&"], 1063 varmatches = [("&", ";"), ("%", "%"), ("%", 1), ("$", "$"), ("$", None), ("#", 1), ("${", "}"), ("$(^", ")")], 1064 criticaltests = ["accelerators"] 1065 ) 1066
1067 -class MozillaChecker(StandardChecker):
1068 - def __init__(self, **kwargs):
1069 checkerconfig = kwargs.get("checkerconfig", None) 1070 if checkerconfig is None: 1071 checkerconfig = CheckerConfig() 1072 kwargs["checkerconfig"] = checkerconfig 1073 checkerconfig.update(mozillaconfig) 1074 StandardChecker.__init__(self, **kwargs)
1075
1076 - def credits(self, str1, str2):
1077 """checks for messages containing translation credits instead of normal translations.""" 1078 for location in self.locations: 1079 if location in ['MOZ_LANGPACK_CONTRIBUTORS', 'credit.translation']: 1080 return False 1081 return True
1082 1083 drupalconfig = CheckerConfig( 1084 varmatches = [("%", None), ("@", None), ("!", None)], 1085 ) 1086
1087 -class DrupalChecker(StandardChecker):
1088 - def __init__(self, **kwargs):
1089 checkerconfig = kwargs.get("checkerconfig", None) 1090 if checkerconfig is None: 1091 checkerconfig = CheckerConfig() 1092 kwargs["checkerconfig"] = checkerconfig 1093 checkerconfig.update(drupalconfig) 1094 StandardChecker.__init__(self, **kwargs)
1095 1096 gnomeconfig = CheckerConfig( 1097 accelmarkers = ["_"], 1098 varmatches = [("%", 1), ("$(", ")")], 1099 credit_sources = [u"translator-credits"] 1100 ) 1101
1102 -class GnomeChecker(StandardChecker):
1103 - def __init__(self, **kwargs):
1104 checkerconfig = kwargs.get("checkerconfig", None) 1105 if checkerconfig is None: 1106 checkerconfig = CheckerConfig() 1107 kwargs["checkerconfig"] = checkerconfig 1108 checkerconfig.update(gnomeconfig) 1109 StandardChecker.__init__(self, **kwargs)
1110
1111 - def gconf(self, str1, str2):
1112 """Checks if we have any gconf config settings translated.""" 1113 for location in self.locations: 1114 if location.find('schemas.in') != -1: 1115 gconf_attributes = gconf_attribute_re.findall(str1) 1116 #stopwords = [word for word in words1 if word in self.config.notranslatewords and word not in words2] 1117 stopwords = [word for word in gconf_attributes if word[1:-1] not in str2] 1118 if stopwords: 1119 raise FilterFailure(u"do not translate gconf attribute: %s" % (u", ".join(stopwords))) 1120 return True
1121 1122 kdeconfig = CheckerConfig( 1123 accelmarkers = ["&"], 1124 varmatches = [("%", 1)], 1125 credit_sources = [u"Your names", u"Your emails", u"ROLES_OF_TRANSLATORS"] 1126 ) 1127
1128 -class KdeChecker(StandardChecker):
1129 - def __init__(self, **kwargs):
1130 # TODO allow setup of KDE plural and translator comments so that they do 1131 # not create false postives 1132 checkerconfig = kwargs.get("checkerconfig", None) 1133 if checkerconfig is None: 1134 checkerconfig = CheckerConfig() 1135 kwargs["checkerconfig"] = checkerconfig 1136 checkerconfig.update(kdeconfig) 1137 StandardChecker.__init__(self, **kwargs)
1138 1139 cclicenseconfig = CheckerConfig(varmatches = [("@", "@")])
1140 -class CCLicenseChecker(StandardChecker):
1141 - def __init__(self, **kwargs):
1142 checkerconfig = kwargs.get("checkerconfig", None) 1143 if checkerconfig is None: 1144 checkerconfig = CheckerConfig() 1145 kwargs["checkerconfig"] = checkerconfig 1146 checkerconfig.update(cclicenseconfig) 1147 StandardChecker.__init__(self, **kwargs)
1148 1149 projectcheckers = { 1150 "openoffice": OpenOfficeChecker, 1151 "mozilla": MozillaChecker, 1152 "kde": KdeChecker, 1153 "wx": KdeChecker, 1154 "gnome": GnomeChecker, 1155 "creativecommons": CCLicenseChecker, 1156 "drupal": DrupalChecker, 1157 } 1158 1159
1160 -class StandardUnitChecker(UnitChecker):
1161 """The standard checks for common checks on translation units."""
1162 - def isfuzzy(self, unit):
1163 """Check if the unit has been marked fuzzy.""" 1164 return not unit.isfuzzy()
1165
1166 - def isreview(self, unit):
1167 """Check if the unit has been marked review.""" 1168 return not unit.isreview()
1169
1170 - def nplurals(self, unit):
1171 """Checks for the correct number of noun forms for plural translations.""" 1172 if unit.hasplural(): 1173 # if we don't have a valid nplurals value, don't run the test 1174 nplurals = self.config.lang.nplurals 1175 if nplurals > 0: 1176 return len(unit.target.strings) == nplurals 1177 return True
1178
1179 - def hassuggestion(self, unit):
1180 """Checks if there is at least one suggested translation for this unit.""" 1181 self.suggestion_store = getattr(self, 'suggestion_store', None) 1182 suggestions = [] 1183 if self.suggestion_store: 1184 suggestions = self.suggestion_store.findunits(unit.source) 1185 elif xliff and isinstance(unit, xliff.xliffunit): 1186 # TODO: we probably want to filter them somehow 1187 suggestions = unit.getalttrans() 1188 return not bool(suggestions)
1189 1190
1191 -def runtests(str1, str2, ignorelist=()):
1192 """verifies that the tests pass for a pair of strings""" 1193 from translate.storage import base 1194 str1 = data.normalized_unicode(str1) 1195 str2 = data.normalized_unicode(str2) 1196 unit = base.TranslationUnit(str1) 1197 unit.target = str2 1198 checker = StandardChecker(excludefilters=ignorelist) 1199 failures = checker.run_filters(unit) 1200 for test in failures: 1201 print "failure: %s: %s\n %r\n %r" % (test, failures[test], str1, str2) 1202 return failures
1203
1204 -def batchruntests(pairs):
1205 """runs test on a batch of string pairs""" 1206 passed, numpairs = 0, len(pairs) 1207 for str1, str2 in pairs: 1208 if runtests(str1, str2): 1209 passed += 1 1210 print 1211 print "total: %d/%d pairs passed" % (passed, numpairs)
1212 1213 if __name__ == '__main__': 1214 testset = [(r"simple", r"somple"), 1215 (r"\this equals \that", r"does \this equal \that?"), 1216 (r"this \'equals\' that", r"this 'equals' that"), 1217 (r" start and end! they must match.", r"start and end! they must match."), 1218 (r"check for matching %variables marked like %this", r"%this %variable is marked"), 1219 (r"check for mismatching %variables marked like %this", r"%that %variable is marked"), 1220 (r"check for mismatching %variables% too", r"how many %variable% are marked"), 1221 (r"%% %%", r"%%"), 1222 (r"Row: %1, Column: %2", r"Mothalo: %1, Kholomo: %2"), 1223 (r"simple lowercase", r"it is all lowercase"), 1224 (r"simple lowercase", r"It Is All Lowercase"), 1225 (r"Simple First Letter Capitals", r"First Letters"), 1226 (r"SIMPLE CAPITALS", r"First Letters"), 1227 (r"SIMPLE CAPITALS", r"ALL CAPITALS"), 1228 (r"forgot to translate", r" ") 1229 ] 1230 batchruntests(testset) 1231