1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 """Module to provide a cache of statistics in a database.
23
24 @organization: Zuza Software Foundation
25 @copyright: 2007 Zuza Software Foundation
26 @license: U{GPL <http://www.fsf.org/licensing/licenses/gpl.html>}
27 """
28
29 try:
30 from sqlite3 import dbapi2
31 except ImportError:
32 from pysqlite2 import dbapi2
33 import os.path
34 import re
35 import sys
36 import stat
37 import thread
38 from UserDict import UserDict
39
40 from translate import __version__ as toolkitversion
41 from translate.lang.common import Common
42 from translate.misc.multistring import multistring
43 from translate.storage import factory
44 from translate.storage.workflow import StateEnum
45
46 kdepluralre = re.compile("^_n: ")
47 brtagre = re.compile("<br\s*?/?>")
48 xmltagre = re.compile("<[^>]+>")
49 numberre = re.compile("\\D\\.\\D")
50
51 extended_state_strings = {
52 StateEnum.EMPTY: "empty",
53 StateEnum.NEEDS_WORK: "needs-work",
54 StateEnum.REJECTED: "rejected",
55 StateEnum.NEEDS_REVIEW: "needs-review",
56 StateEnum.UNREVIEWED: "unreviewed",
57 StateEnum.FINAL: "final",
58 }
59
60 UNTRANSLATED = StateEnum.EMPTY
61 FUZZY = StateEnum.NEEDS_WORK
62 TRANSLATED = StateEnum.UNREVIEWED
63
64 state_strings = {
65 UNTRANSLATED: "untranslated",
66 FUZZY: "fuzzy",
67 TRANSLATED: "translated",
68 }
80
83 """Counts the words in the unit's source and target, taking plurals into
84 account. The target words are only counted if the unit is translated."""
85 (sourcewords, targetwords) = (0, 0)
86 if isinstance(unit.source, multistring):
87 sourcestrings = unit.source.strings
88 else:
89 sourcestrings = [unit.source or ""]
90 for s in sourcestrings:
91 sourcewords += wordcount(s)
92 if not unit.istranslated():
93 return sourcewords, targetwords
94 if isinstance(unit.target, multistring):
95 targetstrings = unit.target.strings
96 else:
97 targetstrings = [unit.target or ""]
98 for s in targetstrings:
99 targetwords += wordcount(s)
100 return sourcewords, targetwords
101
104
105 - def __init__(self, record_keys, record_values=None, compute_derived_values=lambda x: x):
112
114 return tuple(self[key] for key in self.record_keys)
115
122
129
132
135 """Modifies f to commit database changes if it executes without exceptions.
136 Otherwise it rolls back the database.
137
138 ALL publicly accessible methods in StatsCache MUST be decorated with this
139 decorator.
140 """
141
142 def decorated_f(self, *args, **kwargs):
143 try:
144 result = f(self, *args, **kwargs)
145 self.con.commit()
146 return result
147 except:
148
149
150
151 if self.con:
152 self.con.rollback()
153 raise
154 return decorated_f
155
164
167 keys = ['translatedsourcewords',
168 'fuzzysourcewords',
169 'untranslatedsourcewords',
170 'translated',
171 'fuzzy',
172 'untranslated',
173 'translatedtargetwords']
174
177
179 self.cur = cur
180 self.cur.execute("""
181 CREATE TABLE IF NOT EXISTS filetotals(
182 fileid INTEGER PRIMARY KEY AUTOINCREMENT,
183 translatedsourcewords INTEGER NOT NULL,
184 fuzzysourcewords INTEGER NOT NULL,
185 untranslatedsourcewords INTEGER NOT NULL,
186 translated INTEGER NOT NULL,
187 fuzzy INTEGER NOT NULL,
188 untranslated INTEGER NOT NULL,
189 translatedtargetwords INTEGER NOT NULL);""")
190
191 - def new_record(cls, state_for_db=None, sourcewords=None, targetwords=None):
192 record = Record(cls.keys, compute_derived_values=cls._compute_derived_values)
193 if state_for_db is not None:
194 if state_for_db is UNTRANSLATED:
195 record['untranslated'] = 1
196 record['untranslatedsourcewords'] = sourcewords
197 if state_for_db is TRANSLATED:
198 record['translated'] = 1
199 record['translatedsourcewords'] = sourcewords
200 record['translatedtargetwords'] = targetwords
201 elif state_for_db is FUZZY:
202 record['fuzzy'] = 1
203 record['fuzzysourcewords'] = sourcewords
204 return record
205
206 new_record = classmethod(new_record)
207
209 record["total"] = record["untranslated"] + \
210 record["translated"] + \
211 record["fuzzy"]
212 record["totalsourcewords"] = record["untranslatedsourcewords"] + \
213 record["translatedsourcewords"] + \
214 record["fuzzysourcewords"]
215 record["review"] = 0
216 _compute_derived_values = classmethod(_compute_derived_values)
217
224
226 self.cur.execute("""
227 INSERT OR REPLACE into filetotals
228 VALUES (%(fileid)d, %(vals)s);
229 """ % {'fileid': fileid, 'vals': record.as_string_for_db()})
230
232 self.cur.execute("""
233 DELETE FROM filetotals
234 WHERE fileid=?;
235 """, (fileid,))
236
239 """Returns a dictionary with all statistics initalised to 0."""
240 return FileTotals.new_record()
241
245
248 return {"total": [], "translated": [], "fuzzy": [], "untranslated": []}
249
252 return {"sourcewordcount": [], "targetwordcount": []}
253
254
255
256
257
258
259
260
261
262
263 -def get_mod_info(file_path):
264 file_stat = os.stat(file_path)
265 assert not stat.S_ISDIR(file_stat.st_mode)
266 return file_stat.st_mtime, file_stat.st_size
267
270 return os.path.extsep + 'pending'
271
275
279 """An object instantiated as a singleton for each statsfile that provides
280 access to the database cache from a pool of StatsCache objects."""
281 _caches = {}
282 defaultfile = None
283 con = None
284 """This cache's connection"""
285 cur = None
286 """The current cursor"""
287
289 current_thread = thread.get_ident()
290
291 def make_database(statsfile):
292
293 def connect(cache):
294 cache.con = dbapi2.connect(statsfile)
295 cache.cur = cache.con.cursor()
296
297 def clear_old_data(cache):
298 try:
299 cache.cur.execute("""SELECT toolkitbuild FROM files""")
300 val = cache.cur.fetchone()
301
302
303 if val is None or val[0] < toolkitversion.build:
304 cache.con.close()
305 del cache
306 os.unlink(statsfile)
307 return True
308 return False
309 except dbapi2.OperationalError:
310 return False
311
312 cache = cls._caches.setdefault(current_thread, {})[statsfile] = object.__new__(cls)
313 connect(cache)
314 if clear_old_data(cache):
315 connect(cache)
316 cache.create()
317 return cache
318
319 if not statsfile:
320 if not cls.defaultfile:
321 userdir = os.path.expanduser("~")
322 cachedir = None
323 if os.name == "nt":
324 cachedir = os.path.join(userdir, "Translate Toolkit")
325 else:
326 cachedir = os.path.join(userdir, ".translate_toolkit")
327 if not os.path.exists(cachedir):
328 os.mkdir(cachedir)
329 cls.defaultfile = os.path.realpath(os.path.join(cachedir, "stats.db"))
330 statsfile = cls.defaultfile
331 else:
332 statsfile = os.path.realpath(statsfile)
333
334 if current_thread in cls._caches and statsfile in cls._caches[current_thread]:
335 return cls._caches[current_thread][statsfile]
336
337 return make_database(statsfile)
338
339 @transaction
341 """Create all tables and indexes."""
342 self.file_totals = FileTotals(self.cur)
343
344 self.cur.execute("""CREATE TABLE IF NOT EXISTS files(
345 fileid INTEGER PRIMARY KEY AUTOINCREMENT,
346 path VARCHAR NOT NULL UNIQUE,
347 st_mtime INTEGER NOT NULL,
348 st_size INTEGER NOT NULL,
349 toolkitbuild INTEGER NOT NULL);""")
350
351 self.cur.execute("""CREATE UNIQUE INDEX IF NOT EXISTS filepathindex
352 ON files (path);""")
353
354 self.cur.execute("""CREATE TABLE IF NOT EXISTS units(
355 id INTEGER PRIMARY KEY AUTOINCREMENT,
356 unitid VARCHAR NOT NULL,
357 fileid INTEGER NOT NULL,
358 unitindex INTEGER NOT NULL,
359 source VARCHAR NOT NULL,
360 target VARCHAR,
361 state INTEGER,
362 e_state INTEGER,
363 sourcewords INTEGER,
364 targetwords INTEGER);""")
365
366 self.cur.execute("""CREATE INDEX IF NOT EXISTS fileidindex
367 ON units(fileid);""")
368
369 self.cur.execute("""CREATE TABLE IF NOT EXISTS checkerconfigs(
370 configid INTEGER PRIMARY KEY AUTOINCREMENT,
371 config VARCHAR);""")
372
373 self.cur.execute("""CREATE INDEX IF NOT EXISTS configindex
374 ON checkerconfigs(config);""")
375
376 self.cur.execute("""CREATE TABLE IF NOT EXISTS uniterrors(
377 errorid INTEGER PRIMARY KEY AUTOINCREMENT,
378 unitindex INTEGER NOT NULL,
379 fileid INTEGER NOT NULL,
380 configid INTEGER NOT NULL,
381 name VARCHAR NOT NULL,
382 message VARCHAR);""")
383
384 self.cur.execute("""CREATE INDEX IF NOT EXISTS uniterrorindex
385 ON uniterrors(fileid, configid);""")
386
387 @transaction
388 - def _getfileid(self, filename, check_mod_info=True, store=None):
389 """return fileid representing the given file in the statscache.
390
391 if file not in cache or has been updated since last record
392 update, recalculate stats.
393
394 optional argument store can be used to avoid unnessecary
395 reparsing of already loaded translation files.
396
397 store can be a TranslationFile object or a callback that returns one.
398 """
399 if isinstance(filename, str):
400 filename = unicode(filename, sys.getfilesystemencoding())
401 realpath = os.path.realpath(filename)
402 self.cur.execute("""SELECT fileid, st_mtime, st_size FROM files
403 WHERE path=?;""", (realpath,))
404 filerow = self.cur.fetchone()
405 mod_info = get_mod_info(realpath)
406 if filerow:
407 fileid = filerow[0]
408 if not check_mod_info:
409
410 self.cur.execute("""UPDATE files
411 SET st_mtime=?, st_size=?
412 WHERE fileid=?;""", (mod_info[0], mod_info[1], fileid))
413 return fileid
414 if (filerow[1], filerow[2]) == mod_info:
415 return fileid
416
417
418 if callable(store):
419 store = store()
420 else:
421 store = store or factory.getobject(realpath)
422
423 return self._cachestore(store, realpath, mod_info)
424
426 """See if this checker configuration has been used before."""
427 config = str(checker.config.__dict__)
428 self.cur.execute("""SELECT configid, config FROM checkerconfigs WHERE
429 config=?;""", (config,))
430 configrow = self.cur.fetchone()
431 if not configrow or configrow[1] != config:
432 return None
433 else:
434 return configrow[0]
435
436 @transaction
438 """Cache the statistics for the supplied unit(s)."""
439 unitvalues = []
440 for index, unit in enumerate(units):
441 if unit.istranslatable():
442 sourcewords, targetwords = wordsinunit(unit)
443 if unitindex:
444 index = unitindex
445
446 unitvalues.append((unit.getid(), fileid, index, \
447 unit.source, unit.target, \
448 sourcewords, targetwords, \
449 statefordb(unit),
450 unit.get_state_id()))
451 file_totals_record = file_totals_record + FileTotals.new_record(statefordb(unit), sourcewords, targetwords)
452
453 self.cur.executemany("""INSERT INTO units
454 (unitid, fileid, unitindex, source, target, sourcewords, targetwords, state, e_state)
455 values (?, ?, ?, ?, ?, ?, ?, ?, ?);""",
456 unitvalues)
457 self.file_totals[fileid] = file_totals_record
458 if unitindex:
459 return state_strings[statefordb(units[0])]
460 return ""
461
462 @transaction
464 """Calculates and caches the statistics of the given store
465 unconditionally."""
466 self.cur.execute("""DELETE FROM files WHERE
467 path=?;""", (realpath,))
468 self.cur.execute("""INSERT INTO files
469 (fileid, path, st_mtime, st_size, toolkitbuild) values (NULL, ?, ?, ?, ?);""",
470 (realpath, mod_info[0], mod_info[1], toolkitversion.build))
471 fileid = self.cur.lastrowid
472 self.cur.execute("""DELETE FROM units WHERE
473 fileid=?""", (fileid,))
474 self._cacheunitstats(store.units, fileid)
475 return fileid
476
478 stats = {}
479 fileid = self._getfileid(filename, store=store)
480
481 self.cur.execute("""SELECT e_state, COUNT(id), SUM(sourcewords), SUM(targetwords)
482 FROM units WHERE fileid=? GROUP BY e_state""", (fileid,))
483 values = self.cur.fetchall()
484
485 for value in values:
486 stats[extended_state_strings[value[0]]] = {
487 "units": value[1],
488 "sourcewords": value[2],
489 "targetwords": value[3],
490 }
491 return stats
492
493 - def filetotals(self, filename, store=None, extended=False):
494 """Retrieves the statistics for the given file if possible, otherwise
495 delegates to cachestore()."""
496 stats = self.file_totals[self._getfileid(filename, store=store)]
497 if extended:
498 stats["extended"] = self.file_extended_totals(filename, store=store)
499 return stats
500
501 @transaction
503 """Helper method for cachestorechecks() and recacheunit()"""
504
505
506 dummy = (-1, fileid, configid, "noerror", "")
507 unitvalues = [dummy]
508
509 errornames = []
510 for index, unit in enumerate(units):
511 if unit.istranslatable():
512
513 if unitindex:
514 index = unitindex
515 failures = checker.run_filters(unit)
516 for checkname, checkmessage in failures.iteritems():
517 unitvalues.append((index, fileid, configid, checkname, checkmessage))
518 errornames.append("check-" + checkname)
519 checker.setsuggestionstore(None)
520
521 if unitindex:
522
523
524 unitvalues.remove(dummy)
525 errornames.append("total")
526
527
528 self.cur.executemany("""INSERT INTO uniterrors
529 (unitindex, fileid, configid, name, message)
530 values (?, ?, ?, ?, ?);""",
531 unitvalues)
532 return errornames
533
534 @transaction
536 """Calculates and caches the error statistics of the given store
537 unconditionally."""
538
539
540 self.cur.execute("""DELETE FROM uniterrors WHERE
541 fileid=?;""", (fileid,))
542 self._cacheunitschecks(store.units, fileid, configid, checker)
543 return fileid
544
546 values = self.cur.execute("""
547 SELECT state, sourcewords, targetwords
548 FROM units
549 WHERE fileid=? AND unitid=?
550 """, (fileid, unitid))
551 result = values.fetchone()
552 if result is not None:
553 return result
554 else:
555 print >> sys.stderr, """WARNING: Database in inconsistent state.
556 fileid %d and unitid %s have no entries in the table units.""" % (fileid, unitid)
557
558
559
560 return []
561
562 @transaction
564 """Recalculate all information for a specific unit. This is necessary
565 for updating all statistics when a translation of a unit took place,
566 for example.
567
568 This method assumes that everything was up to date before (file totals,
569 checks, checker config, etc."""
570 fileid = self._getfileid(filename, check_mod_info=False)
571 configid = self._get_config_id(fileid, checker)
572 unitid = unit.getid()
573
574 totals_without_unit = self.file_totals[fileid] - \
575 FileTotals.new_record(*self.get_unit_stats(fileid, unitid))
576 self.cur.execute("""SELECT unitindex FROM units WHERE
577 fileid=? AND unitid=?;""", (fileid, unitid))
578 unitindex = self.cur.fetchone()[0]
579 self.cur.execute("""DELETE FROM units WHERE
580 fileid=? AND unitid=?;""", (fileid, unitid))
581 state = [self._cacheunitstats([unit], fileid, unitindex, totals_without_unit)]
582
583 self.cur.execute("""DELETE FROM uniterrors WHERE
584 fileid=? AND unitindex=?;""", (fileid, unitindex))
585 if os.path.exists(suggestion_filename(filename)):
586 checker.setsuggestionstore(factory.getobject(suggestion_filename(filename), ignore=suggestion_extension()))
587 state.extend(self._cacheunitschecks([unit], fileid, configid, checker, unitindex))
588 return state
589
590 - def _checkerrors(self, filename, fileid, configid, checker, store):
591
592 def geterrors():
593 self.cur.execute("""SELECT
594 name,
595 unitindex
596 FROM uniterrors WHERE fileid=? and configid=?
597 ORDER BY unitindex;""", (fileid, configid))
598 return self.cur.fetchone(), self.cur
599
600 first, cur = geterrors()
601 if first is not None:
602 return first, cur
603
604
605
606 if callable(store):
607 store = store()
608 else:
609 store = store or factory.getobject(filename)
610
611 if os.path.exists(suggestion_filename(filename)):
612 checker.setsuggestionstore(factory.getobject(suggestion_filename(filename), ignore=suggestion_extension()))
613 self._cachestorechecks(fileid, store, checker, configid)
614 return geterrors()
615
616 - def _geterrors(self, filename, fileid, configid, checker, store):
617 result = []
618 first, cur = self._checkerrors(filename, fileid, configid, checker, store)
619 result.append(first)
620 result.extend(cur.fetchall())
621 return result
622
623 @transaction
625 configid = self._getstoredcheckerconfig(checker)
626 if configid:
627 return configid
628 self.cur.execute("""INSERT INTO checkerconfigs
629 (configid, config) values (NULL, ?);""",
630 (str(checker.config.__dict__),))
631 return self.cur.lastrowid
632
633 - def filechecks(self, filename, checker, store=None):
634 """Retrieves the error statistics for the given file if possible,
635 otherwise delegates to cachestorechecks()."""
636 fileid = self._getfileid(filename, store=store)
637 configid = self._get_config_id(fileid, checker)
638 values = self._geterrors(filename, fileid, configid, checker, store)
639
640 errors = emptyfilechecks()
641 for value in values:
642 if value[1] == -1:
643 continue
644 checkkey = 'check-' + value[0]
645 if not checkkey in errors:
646 errors[checkkey] = []
647 errors[checkkey].append(value[1])
648
649 return errors
650
652 fileid = self._getfileid(filename)
653 configid = self._get_config_id(fileid, checker)
654 self._checkerrors(filename, fileid, configid, checker, None)
655 self.cur.execute("""SELECT
656 name,
657 unitindex
658 FROM uniterrors
659 WHERE fileid=? and configid=? and name=?;""", (fileid, configid, name))
660 return self.cur.fetchone() is not None
661
663 """Return a dictionary of unit stats mapping sets of unit
664 indices with those states"""
665 stats = emptyfilestats()
666 if extended:
667 stats["extended"] = {}
668
669 fileid = self._getfileid(filename, store=store)
670
671 self.cur.execute("""SELECT state, e_state, unitindex
672 FROM units WHERE fileid=? ORDER BY unitindex;""", (fileid,))
673 values = self.cur.fetchall()
674
675 for value in values:
676 stats[state_strings[value[0]]].append(value[2])
677 if extended:
678 if value[1] not in stats["extended"]:
679 stats["extended"][value[1]] = []
680 stats["extended"][value[1]].append(value[2])
681 stats["total"].append(value[2])
682 return stats
683
684 - def filestats(self, filename, checker, store=None, extended=False):
691
692 - def unitstats(self, filename, _lang=None, store=None):
693
694
695
696 """Return a dictionary of property names mapping to arrays which
697 map unit indices to property values.
698
699 Please note that this is different from filestats, since filestats
700 supplies sets of unit indices with a given property, whereas this
701 method supplies arrays which map unit indices to given values."""
702 stats = emptyunitstats()
703
704
705 fileid = self._getfileid(filename, store=store)
706
707 self.cur.execute("""SELECT
708 sourcewords, targetwords
709 FROM units WHERE fileid=?
710 ORDER BY unitindex;""", (fileid,))
711
712 for sourcecount, targetcount in self.cur.fetchall():
713 stats["sourcewordcount"].append(sourcecount)
714 stats["targetwordcount"].append(targetcount)
715
716 return stats
717