Package translate :: Package tools :: Module pretranslate
[hide private]
[frames] | no frames]

Source Code for Module translate.tools.pretranslate

  1  #!/usr/bin/env python 
  2  # -*- coding: utf-8 -*- 
  3  # 
  4  # Copyright 2008 Zuza Software Foundation 
  5  # 
  6  # This file is part of translate. 
  7  # 
  8  # This program is free software; you can redistribute it and/or modify 
  9  # it under the terms of the GNU General Public License as published by 
 10  # the Free Software Foundation; either version 2 of the License, or 
 11  # (at your option) any later version. 
 12  # 
 13  # This program is distributed in the hope that it will be useful, 
 14  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 15  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 16  # GNU General Public License for more details. 
 17  # 
 18  # You should have received a copy of the GNU General Public License 
 19  # along with this program; if not, see <http://www.gnu.org/licenses/>. 
 20   
 21  """Fill localization files with suggested translations based on 
 22  translation memory and existing translations. 
 23  """ 
 24   
 25  from translate.storage import factory 
 26  from translate.storage import xliff 
 27  from translate.search import match 
 28   
 29  # We don't want to reinitialise the TM each time, so let's store it here. 
 30  tmmatcher = None 
 31   
32 -def memory(tmfiles, max_candidates=1, min_similarity=75, max_length=1000):
33 """Returns the TM store to use. Only initialises on first call.""" 34 global tmmatcher 35 # Only initialise first time 36 if tmmatcher is None: 37 if isinstance(tmfiles, list): 38 tmstore = [factory.getobject(tmfile) for tmfile in tmfiles] 39 else: 40 tmstore = factory.getobject(tmfiles) 41 tmmatcher = match.matcher(tmstore, max_candidates=max_candidates, min_similarity=min_similarity, max_length=max_length) 42 return tmmatcher
43 44
45 -def pretranslate_file(input_file, output_file, template_file, tm=None, min_similarity=75, fuzzymatching=True):
46 """Pretranslate any factory supported file with old translations and translation memory.""" 47 input_store = factory.getobject(input_file) 48 template_store = None 49 if template_file is not None: 50 template_store = factory.getobject(template_file) 51 52 output = pretranslate_store(input_store, template_store, tm, min_similarity, fuzzymatching) 53 output_file.write(str(output)) 54 return 1
55 56
57 -def match_template_id(input_unit, template_store):
58 """Returns a matching unit from a template.""" 59 # we want to use slightly different matching strategies for PO files 60 # generated by our own moz2po and oo2po. Let's take a cheap shot at 61 # detecting them from the presence of a ':' in the first location. 62 locations = input_unit.getlocations() 63 if not locations or ":" in locations[0]: 64 # do normal gettext-like matching 65 matching_unit = template_store.findid(input_unit.getid()) 66 return matching_unit 67 68 else: 69 #since oo2po and moz2po use location as unique identifiers for strings 70 #we match against location first, then check for matching source strings 71 #this makes no sense for normal gettext files 72 for location in locations: 73 matching_unit = template_store.locationindex.get(location, None) 74 #do we really want to discard units with matching locations but no matching source? 75 if matching_unit is not None and matching_unit.source == input_unit.source and matching_unit.gettargetlen() > 0: 76 return matching_unit
77 78
79 -def match_fuzzy(input_unit, matchers):
80 """Return a fuzzy match from a queue of matchers.""" 81 for matcher in matchers: 82 fuzzycandidates = matcher.matches(input_unit.source) 83 if fuzzycandidates: 84 return fuzzycandidates[0]
85 86
87 -def pretranslate_unit(input_unit, template_store, matchers=None, mark_reused=False) :
88 """Pretranslate a unit or return unchanged if no translation was found.""" 89 90 matching_unit = None 91 #do template matching 92 if template_store: 93 matching_unit = match_template_id(input_unit, template_store) 94 95 if matching_unit and matching_unit.gettargetlen() > 0: 96 input_unit.merge(matching_unit, authoritative=True) 97 elif matchers: 98 #do fuzzy matching 99 matching_unit = match_fuzzy(input_unit, matchers) 100 if matching_unit and matching_unit.gettargetlen() > 0: 101 #FIXME: should we dispatch here instead of this crude type check 102 if isinstance(input_unit, xliff.xliffunit): 103 #FIXME: what about origin, lang and matchquality 104 input_unit.addalttrans(matching_unit.target, origin="fish", sourcetxt=matching_unit.source) 105 else: 106 input_unit.merge(matching_unit, authoritative=True) 107 108 #FIXME: ugly hack required by pot2po to mark old 109 #translations reused for new file. loops over 110 if mark_reused and matching_unit and template_store: 111 original_unit = template_store.findunit(matching_unit.source) 112 if original_unit is not None: 113 original_unit.reused = True 114 115 return input_unit
116
117 -def prepare_template_pofile(template_store):
118 """PO format specific template preparation logic.""" 119 #do we want to consider obsolete translations? 120 for unit in template_store.units: 121 if unit.isobsolete(): 122 unit.resurrect()
123
124 -def pretranslate_store(input_store, template_store, tm=None, min_similarity=75, fuzzymatching=True):
125 """Do the actual pretranslation of a whole store.""" 126 #preperation 127 matchers = [] 128 #prepare template 129 if template_store is not None: 130 template_store.makeindex() 131 #template preparation based on type 132 prepare_template = "prepare_template_%s" % template_store.__class__.__name__ 133 if globals().has_key(prepare_template): 134 globals()[prepare_template](template_store) 135 136 if fuzzymatching: 137 #create template matcher 138 #FIXME: max_length hardcoded 139 matcher = match.matcher(template_store, max_candidates=1, min_similarity=min_similarity, max_length=3000, usefuzzy=True) 140 matcher.addpercentage = False 141 matchers.append(matcher) 142 143 #prepare tm 144 #create tm matcher 145 if tm and fuzzymatching: 146 #FIXME: max_length hardcoded 147 matcher = memory(tm, max_candidates=1, min_similarity=min_similarity, max_length=1000) 148 matcher.addpercentage = False 149 matchers.append(matcher) 150 151 #main loop 152 for input_unit in input_store.units: 153 if input_unit.istranslatable(): 154 input_unit = pretranslate_unit(input_unit, template_store, matchers) 155 156 return input_store
157 158
159 -def main(argv=None):
160 from translate.convert import convert 161 formats = {"pot": ("po", pretranslate_file), ("pot", "po"): ("po", pretranslate_file), 162 "po": ("po", pretranslate_file), ("po", "po"): ("po", pretranslate_file), 163 "xlf": ("xlf", pretranslate_file), ("xlf", "xlf"): ("xlf", pretranslate_file), 164 } 165 parser = convert.ConvertOptionParser(formats, usetemplates=True, 166 allowmissingtemplate=True, description=__doc__) 167 parser.add_option("", "--tm", dest="tm", default=None, 168 help="The file to use as translation memory when fuzzy matching") 169 parser.passthrough.append("tm") 170 defaultsimilarity = 75 171 parser.add_option("-s", "--similarity", dest="min_similarity", default=defaultsimilarity, 172 type="float", help="The minimum similarity for inclusion (default: %d%%)" % defaultsimilarity) 173 parser.passthrough.append("min_similarity") 174 parser.add_option("--nofuzzymatching", dest="fuzzymatching", action="store_false", 175 default=True, help="Disable fuzzy matching") 176 parser.passthrough.append("fuzzymatching") 177 parser.run(argv)
178 179 180 if __name__ == '__main__': 181 main() 182