Package translate :: Package tools :: Module pocount
[hide private]
[frames] | no frames]

Source Code for Module translate.tools.pocount

  1  #!/usr/bin/env python 
  2  # -*- coding: utf-8 -*- 
  3  # 
  4  # Copyright 2003-2009 Zuza Software Foundation 
  5  # 
  6  # This file is part of the Translate Toolkit. 
  7  # 
  8  # This program is free software; you can redistribute it and/or modify 
  9  # it under the terms of the GNU General Public License as published by 
 10  # the Free Software Foundation; either version 2 of the License, or 
 11  # (at your option) any later version. 
 12  # 
 13  # This program is distributed in the hope that it will be useful, 
 14  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 15  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 16  # GNU General Public License for more details. 
 17  # 
 18  # You should have received a copy of the GNU General Public License 
 19  # along with this program; if not, see <http://www.gnu.org/licenses/>. 
 20   
 21  """Create string and word counts for supported localization files including: 
 22  XLIFF, TMX, Gettex PO and MO, Qt .ts and .qm, Wordfast TM, etc 
 23   
 24  See: http://translate.sourceforge.net/wiki/toolkit/pocount for examples and 
 25  usage instructions 
 26  """ 
 27   
 28  from translate.storage import factory 
 29  from translate.storage import statsdb 
 30  from optparse import OptionParser 
 31  import sys 
 32  import os 
 33   
 34  # define style constants 
 35  style_full, style_csv, style_short_strings, style_short_words = range(4) 
 36   
 37  # default output style 
 38  default_style = style_full 
 39   
 40   
41 -def calcstats_old(filename):
42 """This is the previous implementation of calcstats() and is left for 43 comparison and debuging purposes.""" 44 # ignore totally blank or header units 45 try: 46 store = factory.getobject(filename) 47 except ValueError, e: 48 print str(e) 49 return {} 50 units = filter(lambda unit: not unit.isheader(), store.units) 51 translated = translatedmessages(units) 52 fuzzy = fuzzymessages(units) 53 review = filter(lambda unit: unit.isreview(), units) 54 untranslated = untranslatedmessages(units) 55 wordcounts = dict(map(lambda unit: (unit, statsdb.wordsinunit(unit)), units)) 56 sourcewords = lambda elementlist: sum(map(lambda unit: wordcounts[unit][0], elementlist)) 57 targetwords = lambda elementlist: sum(map(lambda unit: wordcounts[unit][1], elementlist)) 58 stats = {} 59 60 #units 61 stats["translated"] = len(translated) 62 stats["fuzzy"] = len(fuzzy) 63 stats["untranslated"] = len(untranslated) 64 stats["review"] = len(review) 65 stats["total"] = stats["translated"] + stats["fuzzy"] + stats["untranslated"] 66 67 #words 68 stats["translatedsourcewords"] = sourcewords(translated) 69 stats["translatedtargetwords"] = targetwords(translated) 70 stats["fuzzysourcewords"] = sourcewords(fuzzy) 71 stats["untranslatedsourcewords"] = sourcewords(untranslated) 72 stats["reviewsourcewords"] = sourcewords(review) 73 stats["totalsourcewords"] = stats["translatedsourcewords"] + \ 74 stats["fuzzysourcewords"] + \ 75 stats["untranslatedsourcewords"] 76 return stats
77 78
79 -def calcstats(filename):
80 statscache = statsdb.StatsCache() 81 return statscache.filetotals(filename)
82 83
84 -def summarize(title, stats, style=style_full, indent=8, incomplete_only=False):
85 """ 86 Print summary for a .po file in specified format. 87 88 @param title: name of .po file 89 @param stats: array with translation statistics for the file specified 90 @param indent: indentation of the 2nd column (length of longest filename) 91 @param incomplete_only: omit fully translated files 92 @type incomplete_only: Boolean 93 @rtype: Boolean 94 @return: 1 if counting incomplete files (incomplete_only=True) and the 95 file is completely translated, 0 otherwise 96 """ 97 98 def percent(denominator, devisor): 99 if devisor == 0: 100 return 0 101 else: 102 return denominator * 100 / devisor
103 104 if incomplete_only and (stats["total"] == stats["translated"]): 105 return 1 106 107 if (style == style_csv): 108 print "%s, " % title, 109 print "%d, %d, %d," % (stats["translated"], stats["translatedsourcewords"], stats["translatedtargetwords"]), 110 print "%d, %d," % (stats["fuzzy"], stats["fuzzysourcewords"]), 111 print "%d, %d," % (stats["untranslated"], stats["untranslatedsourcewords"]), 112 print "%d, %d" % (stats["total"], stats["totalsourcewords"]), 113 if stats["review"] > 0: 114 print ", %d, %d" % (stats["review"], stats["reviewsourdcewords"]), 115 print 116 elif (style == style_short_strings): 117 spaces = " " * (indent - len(title)) 118 print "%s%s strings: total: %d\t| %dt\t%df\t%du\t| %d%%t\t%d%%f\t%d%%u" % (title, spaces, \ 119 stats["total"], stats["translated"], stats["fuzzy"], stats["untranslated"], \ 120 percent(stats["translated"], stats["total"]), \ 121 percent(stats["fuzzy"], stats["total"]), \ 122 percent(stats["untranslated"], stats["total"])) 123 elif (style == style_short_words): 124 spaces = " " * (indent - len(title)) 125 print "%s%s source words: total: %d\t| %dt\t%df\t%du\t| %d%%t\t%d%%f\t%d%%u" % (title, spaces, \ 126 stats["totalsourcewords"], stats["translatedsourcewords"], stats["fuzzysourcewords"], stats["untranslatedsourcewords"], \ 127 percent(stats["translatedsourcewords"], stats["totalsourcewords"]), \ 128 percent(stats["fuzzysourcewords"], stats["totalsourcewords"]), \ 129 percent(stats["untranslatedsourcewords"], stats["totalsourcewords"])) 130 else: # style == style_full 131 print title 132 print "type strings words (source) words (translation)" 133 print "translated: %5d (%3d%%) %10d (%3d%%) %15d" % \ 134 (stats["translated"], \ 135 percent(stats["translated"], stats["total"]), \ 136 stats["translatedsourcewords"], \ 137 percent(stats["translatedsourcewords"], stats["totalsourcewords"]), \ 138 stats["translatedtargetwords"]) 139 print "fuzzy: %5d (%3d%%) %10d (%3d%%) n/a" % \ 140 (stats["fuzzy"], \ 141 percent(stats["fuzzy"], stats["total"]), \ 142 stats["fuzzysourcewords"], \ 143 percent(stats["fuzzysourcewords"], stats["totalsourcewords"])) 144 print "untranslated: %5d (%3d%%) %10d (%3d%%) n/a" % \ 145 (stats["untranslated"], \ 146 percent(stats["untranslated"], stats["total"]), \ 147 stats["untranslatedsourcewords"], \ 148 percent(stats["untranslatedsourcewords"], stats["totalsourcewords"])) 149 print "Total: %5d %17d %22d" % \ 150 (stats["total"], \ 151 stats["totalsourcewords"], \ 152 stats["translatedtargetwords"]) 153 if stats["review"] > 0: 154 print "review: %5d %17d n/a" % \ 155 (stats["review"], stats["reviewsourcewords"]) 156 print 157 return 0 158 159
160 -def fuzzymessages(units):
161 return filter(lambda unit: unit.isfuzzy() and unit.target, units)
162 163
164 -def translatedmessages(units):
165 return filter(lambda unit: unit.istranslated(), units)
166 167
168 -def untranslatedmessages(units):
169 return filter(lambda unit: not (unit.istranslated() or unit.isfuzzy()) and unit.source, units)
170 171
172 -class summarizer:
173
174 - def __init__(self, filenames, style=default_style, incomplete_only=False):
175 self.totals = {} 176 self.filecount = 0 177 self.longestfilename = 0 178 self.style = style 179 self.incomplete_only = incomplete_only 180 self.complete_count = 0 181 182 if (self.style == style_csv): 183 print "Filename, Translated Messages, Translated Source Words, Translated \ 184 Target Words, Fuzzy Messages, Fuzzy Source Words, Untranslated Messages, \ 185 Untranslated Source Words, Total Message, Total Source Words, \ 186 Review Messages, Review Source Words" 187 if (self.style == style_short_strings or self.style == style_short_words): 188 for filename in filenames: # find longest filename 189 if (len(filename) > self.longestfilename): 190 self.longestfilename = len(filename) 191 for filename in filenames: 192 if not os.path.exists(filename): 193 print >> sys.stderr, "cannot process %s: does not exist" % filename 194 continue 195 elif os.path.isdir(filename): 196 self.handledir(filename) 197 else: 198 self.handlefile(filename) 199 if self.filecount > 1 and (self.style == style_full): 200 if self.incomplete_only: 201 summarize("TOTAL (incomplete only):", self.totals, 202 incomplete_only=True) 203 print "File count (incomplete): %5d" % (self.filecount - self.complete_count) 204 else: 205 summarize("TOTAL:", self.totals, incomplete_only=False) 206 print "File count: %5d" % (self.filecount) 207 print
208
209 - def updatetotals(self, stats):
210 """Update self.totals with the statistics in stats.""" 211 for key in stats.keys(): 212 if not key in self.totals: 213 self.totals[key] = 0 214 self.totals[key] += stats[key]
215
216 - def handlefile(self, filename):
217 try: 218 stats = calcstats(filename) 219 self.updatetotals(stats) 220 self.complete_count += summarize(filename, stats, self.style, 221 self.longestfilename, 222 self.incomplete_only) 223 self.filecount += 1 224 except: # This happens if we have a broken file. 225 print >> sys.stderr, sys.exc_info()[1]
226
227 - def handlefiles(self, dirname, filenames):
228 for filename in filenames: 229 pathname = os.path.join(dirname, filename) 230 if os.path.isdir(pathname): 231 self.handledir(pathname) 232 else: 233 self.handlefile(pathname)
234
235 - def handledir(self, dirname):
236 path, name = os.path.split(dirname) 237 if name in ["CVS", ".svn", "_darcs", ".git", ".hg", ".bzr"]: 238 return 239 entries = os.listdir(dirname) 240 self.handlefiles(dirname, entries)
241 242
243 -def main():
244 parser = OptionParser(usage="usage: %prog [options] po-files") 245 parser.add_option("--incomplete", action="store_const", const=True, 246 dest="incomplete_only", 247 help="skip 100% translated files.") 248 # options controlling output format: 249 parser.add_option("--full", action="store_const", const=style_csv, 250 dest="style_full", 251 help="(default) statistics in full, verbose format") 252 parser.add_option("--csv", action="store_const", const=style_csv, 253 dest="style_csv", 254 help="statistics in CSV format") 255 parser.add_option("--short", action="store_const", const=style_csv, 256 dest="style_short_strings", 257 help="same as --short-strings") 258 parser.add_option("--short-strings", action="store_const", 259 const=style_csv, dest="style_short_strings", 260 help="statistics of strings in short format - one line per file") 261 parser.add_option("--short-words", action="store_const", 262 const=style_csv, dest="style_short_words", 263 help="statistics of words in short format - one line per file") 264 265 (options, args) = parser.parse_args() 266 267 if (options.incomplete_only == None): 268 options.incomplete_only = False 269 270 if (options.style_full and options.style_csv) or \ 271 (options.style_full and options.style_short_strings) or \ 272 (options.style_full and options.style_short_words) or \ 273 (options.style_csv and options.style_short_strings) or \ 274 (options.style_csv and options.style_short_words) or \ 275 (options.style_short_strings and options.style_short_words): 276 parser.error("options --full, --csv, --short-strings and --short-words are mutually exclusive") 277 sys.exit(2) 278 279 style = default_style # default output style 280 if options.style_csv: 281 style = style_csv 282 if options.style_full: 283 style = style_full 284 if options.style_short_strings: 285 style = style_short_strings 286 if options.style_short_words: 287 style = style_short_words 288 289 try: 290 import psyco 291 psyco.full() 292 except Exception: 293 pass 294 295 summarizer(args, style, options.incomplete_only)
296 297 if __name__ == '__main__': 298 main() 299