1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 """Module to provide a cache of statistics in a database.
23
24 @organization: Zuza Software Foundation
25 @copyright: 2007 Zuza Software Foundation
26 @license: U{GPL <http://www.fsf.org/licensing/licenses/gpl.html>}
27 """
28
29 try:
30 from sqlite3 import dbapi2
31 except ImportError:
32 from pysqlite2 import dbapi2
33 import os.path
34 import re
35 import sys
36 import stat
37 import thread
38 from UserDict import UserDict
39
40 from translate import __version__ as toolkitversion
41 from translate.lang.common import Common
42 from translate.misc.multistring import multistring
43 from translate.storage import factory
44 from translate.storage.workflow import StateEnum
45
46
47 brtagre = re.compile("<br\s*?/?>")
48 xmltagre = re.compile("<[^>]+>")
49 numberre = re.compile("\\D\\.\\D")
50
51 extended_state_strings = {
52 StateEnum.EMPTY: "empty",
53 StateEnum.NEEDS_WORK: "needs-work",
54 StateEnum.REJECTED: "rejected",
55 StateEnum.NEEDS_REVIEW: "needs-review",
56 StateEnum.UNREVIEWED: "unreviewed",
57 StateEnum.FINAL: "final",
58 }
59
60 UNTRANSLATED = StateEnum.EMPTY
61 FUZZY = StateEnum.NEEDS_WORK
62 TRANSLATED = StateEnum.UNREVIEWED
63
64 state_strings = {
65 UNTRANSLATED: "untranslated",
66 FUZZY: "fuzzy",
67 TRANSLATED: "translated",
68 }
80
83 """Counts the words in the unit's source and target, taking plurals into
84 account. The target words are only counted if the unit is translated."""
85 (sourcewords, targetwords) = (0, 0)
86 if isinstance(unit.source, multistring):
87 sourcestrings = unit.source.strings
88 else:
89 sourcestrings = [unit.source or ""]
90 for s in sourcestrings:
91 sourcewords += wordcount(s)
92 if not unit.istranslated():
93 return sourcewords, targetwords
94 if isinstance(unit.target, multistring):
95 targetstrings = unit.target.strings
96 else:
97 targetstrings = [unit.target or ""]
98 for s in targetstrings:
99 targetwords += wordcount(s)
100 return sourcewords, targetwords
101
104
105 - def __init__(self, record_keys, record_values=None, compute_derived_values=lambda x: x):
112
114 return tuple(self[key] for key in self.record_keys)
115
122
129
132
135 """Modifies f to commit database changes if it executes without exceptions.
136 Otherwise it rolls back the database.
137
138 ALL publicly accessible methods in StatsCache MUST be decorated with this
139 decorator.
140 """
141
142 def decorated_f(self, *args, **kwargs):
143 try:
144 result = f(self, *args, **kwargs)
145 self.con.commit()
146 return result
147 except:
148
149
150
151 if self.con:
152 self.con.rollback()
153 raise
154 return decorated_f
155
164
167 keys = ['translatedsourcewords',
168 'fuzzysourcewords',
169 'untranslatedsourcewords',
170 'translated',
171 'fuzzy',
172 'untranslated',
173 'translatedtargetwords']
174
177
179 self.cur = cur
180 self.cur.execute("""
181 CREATE TABLE IF NOT EXISTS filetotals(
182 fileid INTEGER PRIMARY KEY AUTOINCREMENT,
183 translatedsourcewords INTEGER NOT NULL,
184 fuzzysourcewords INTEGER NOT NULL,
185 untranslatedsourcewords INTEGER NOT NULL,
186 translated INTEGER NOT NULL,
187 fuzzy INTEGER NOT NULL,
188 untranslated INTEGER NOT NULL,
189 translatedtargetwords INTEGER NOT NULL);""")
190
191 - def new_record(cls, state_for_db=None, sourcewords=None, targetwords=None):
192 record = Record(cls.keys, compute_derived_values=cls._compute_derived_values)
193 if state_for_db is not None:
194 if state_for_db is UNTRANSLATED:
195 record['untranslated'] = 1
196 record['untranslatedsourcewords'] = sourcewords
197 if state_for_db is TRANSLATED:
198 record['translated'] = 1
199 record['translatedsourcewords'] = sourcewords
200 record['translatedtargetwords'] = targetwords
201 elif state_for_db is FUZZY:
202 record['fuzzy'] = 1
203 record['fuzzysourcewords'] = sourcewords
204 return record
205
206 new_record = classmethod(new_record)
207
209 record["total"] = record["untranslated"] + \
210 record["translated"] + \
211 record["fuzzy"]
212 record["totalsourcewords"] = record["untranslatedsourcewords"] + \
213 record["translatedsourcewords"] + \
214 record["fuzzysourcewords"]
215 record["review"] = 0
216 _compute_derived_values = classmethod(_compute_derived_values)
217
224
226 self.cur.execute("""
227 INSERT OR REPLACE into filetotals
228 VALUES (%(fileid)d, %(vals)s);
229 """ % {'fileid': fileid, 'vals': record.as_string_for_db()})
230
232 self.cur.execute("""
233 DELETE FROM filetotals
234 WHERE fileid=?;
235 """, (fileid,))
236
239 """Returns a dictionary with all statistics initalised to 0."""
240 return FileTotals.new_record()
241
245
248 return {"total": [], "translated": [], "fuzzy": [], "untranslated": []}
249
252 return {"sourcewordcount": [], "targetwordcount": []}
253
254
255
256
257
258
259
260
261
262
263 -def get_mod_info(file_path):
264 file_stat = os.stat(file_path)
265 assert not stat.S_ISDIR(file_stat.st_mode)
266 return file_stat.st_mtime, file_stat.st_size
267
270 return os.path.extsep + 'pending'
271
275
279 """An object instantiated as a singleton for each statsfile that provides
280 access to the database cache from a pool of StatsCache objects."""
281 _caches = {}
282 defaultfile = None
283 con = None
284 """This cache's connection"""
285 cur = None
286 """The current cursor"""
287
289 current_thread = thread.get_ident()
290
291 def make_database(statsfile):
292
293 def connect(cache):
294 cache.con = dbapi2.connect(statsfile)
295 cache.cur = cache.con.cursor()
296
297 def clear_old_data(cache):
298 try:
299 cache.cur.execute("""SELECT min(toolkitbuild) FROM files""")
300 val = cache.cur.fetchone()
301
302
303 if val is None or val[0] < toolkitversion.build:
304 cache.con.close()
305 del cache
306 os.unlink(statsfile)
307 return True
308 return False
309 except dbapi2.OperationalError:
310 return False
311
312 cache = cls._caches.setdefault(current_thread, {})[statsfile] = object.__new__(cls)
313 connect(cache)
314 if clear_old_data(cache):
315 connect(cache)
316 cache.create()
317 return cache
318
319 if not statsfile:
320 if not cls.defaultfile:
321 userdir = os.path.expanduser("~")
322 cachedir = None
323 if os.name == "nt":
324 cachedir = os.path.join(userdir, "Translate Toolkit")
325 else:
326 cachedir = os.path.join(userdir, ".translate_toolkit")
327 if not os.path.exists(cachedir):
328 os.mkdir(cachedir)
329 cls.defaultfile = os.path.realpath(os.path.join(cachedir, "stats.db"))
330 statsfile = cls.defaultfile
331 else:
332 statsfile = os.path.realpath(statsfile)
333
334 if current_thread in cls._caches and statsfile in cls._caches[current_thread]:
335 return cls._caches[current_thread][statsfile]
336
337 return make_database(statsfile)
338
339 @transaction
341 """Create all tables and indexes."""
342 self.file_totals = FileTotals(self.cur)
343
344 self.cur.execute("""CREATE TABLE IF NOT EXISTS files(
345 fileid INTEGER PRIMARY KEY AUTOINCREMENT,
346 path VARCHAR NOT NULL UNIQUE,
347 st_mtime INTEGER NOT NULL,
348 st_size INTEGER NOT NULL,
349 toolkitbuild INTEGER NOT NULL);""")
350
351 self.cur.execute("""CREATE UNIQUE INDEX IF NOT EXISTS filepathindex
352 ON files (path);""")
353
354 self.cur.execute("""CREATE TABLE IF NOT EXISTS units(
355 id INTEGER PRIMARY KEY AUTOINCREMENT,
356 unitid VARCHAR NOT NULL,
357 fileid INTEGER NOT NULL,
358 unitindex INTEGER NOT NULL,
359 source VARCHAR NOT NULL,
360 target VARCHAR,
361 state INTEGER,
362 e_state INTEGER,
363 sourcewords INTEGER,
364 targetwords INTEGER);""")
365
366 self.cur.execute("""CREATE INDEX IF NOT EXISTS fileidindex
367 ON units(fileid);""")
368
369 self.cur.execute("""CREATE TABLE IF NOT EXISTS checkerconfigs(
370 configid INTEGER PRIMARY KEY AUTOINCREMENT,
371 config VARCHAR);""")
372
373 self.cur.execute("""CREATE INDEX IF NOT EXISTS configindex
374 ON checkerconfigs(config);""")
375
376 self.cur.execute("""CREATE TABLE IF NOT EXISTS uniterrors(
377 errorid INTEGER PRIMARY KEY AUTOINCREMENT,
378 unitindex INTEGER NOT NULL,
379 fileid INTEGER NOT NULL,
380 configid INTEGER NOT NULL,
381 name VARCHAR NOT NULL,
382 message VARCHAR);""")
383
384 self.cur.execute("""CREATE INDEX IF NOT EXISTS uniterrorindex
385 ON uniterrors(fileid, configid);""")
386
387 @transaction
388 - def _getfileid(self, filename, check_mod_info=True, store=None):
389 """return fileid representing the given file in the statscache.
390
391 if file not in cache or has been updated since last record
392 update, recalculate stats.
393
394 optional argument store can be used to avoid unnessecary
395 reparsing of already loaded translation files.
396
397 store can be a TranslationFile object or a callback that returns one.
398 """
399 if isinstance(filename, str):
400 filename = unicode(filename, sys.getfilesystemencoding())
401 realpath = os.path.realpath(filename)
402 self.cur.execute("""SELECT fileid, st_mtime, st_size FROM files
403 WHERE path=?;""", (realpath,))
404 filerow = self.cur.fetchone()
405 mod_info = get_mod_info(realpath)
406 if filerow:
407 fileid = filerow[0]
408 if not check_mod_info:
409
410 self.cur.execute("""UPDATE files
411 SET st_mtime=?, st_size=?
412 WHERE fileid=?;""", (mod_info[0], mod_info[1], fileid))
413 return fileid
414 if (filerow[1], filerow[2]) == mod_info:
415 return fileid
416
417
418 if callable(store):
419 store = store()
420 else:
421 store = store or factory.getobject(realpath)
422
423 return self._cachestore(store, realpath, mod_info)
424
426 """See if this checker configuration has been used before."""
427 config = str(checker.config.__dict__)
428 self.cur.execute("""SELECT configid, config FROM checkerconfigs WHERE
429 config=?;""", (config,))
430 configrow = self.cur.fetchone()
431 if not configrow or configrow[1] != config:
432 return None
433 else:
434 return configrow[0]
435
436 @transaction
438 """Cache the statistics for the supplied unit(s)."""
439 unitvalues = []
440 for index, unit in enumerate(units):
441 if unit.istranslatable():
442 sourcewords, targetwords = wordsinunit(unit)
443 if unitindex:
444 index = unitindex
445
446 unit_state_for_db = statefordb(unit)
447 unitvalues.append((unit.getid(), fileid, index, \
448 unit.source, unit.target, \
449 sourcewords, targetwords, \
450 unit_state_for_db,
451 unit.get_state_id()))
452 file_totals_record = file_totals_record + FileTotals.new_record(unit_state_for_db, sourcewords, targetwords)
453
454 self.cur.executemany("""INSERT INTO units
455 (unitid, fileid, unitindex, source, target, sourcewords, targetwords, state, e_state)
456 values (?, ?, ?, ?, ?, ?, ?, ?, ?);""",
457 unitvalues)
458 self.file_totals[fileid] = file_totals_record
459 if unitindex:
460 return state_strings[statefordb(units[0])]
461 return ""
462
463 @transaction
465 """Calculates and caches the statistics of the given store
466 unconditionally."""
467 self.cur.execute("""DELETE FROM files WHERE
468 path=?;""", (realpath,))
469 self.cur.execute("""INSERT INTO files
470 (fileid, path, st_mtime, st_size, toolkitbuild) values (NULL, ?, ?, ?, ?);""",
471 (realpath, mod_info[0], mod_info[1], toolkitversion.build))
472 fileid = self.cur.lastrowid
473 self.cur.execute("""DELETE FROM units WHERE
474 fileid=?""", (fileid,))
475 self._cacheunitstats(store.units, fileid)
476 return fileid
477
479 stats = {}
480 fileid = self._getfileid(filename, store=store)
481
482 self.cur.execute("""SELECT e_state, COUNT(id), SUM(sourcewords), SUM(targetwords)
483 FROM units WHERE fileid=? GROUP BY e_state""", (fileid,))
484 values = self.cur.fetchall()
485
486 for value in values:
487 stats[extended_state_strings[value[0]]] = {
488 "units": value[1],
489 "sourcewords": value[2],
490 "targetwords": value[3],
491 }
492 return stats
493
494 - def filetotals(self, filename, store=None, extended=False):
495 """Retrieves the statistics for the given file if possible, otherwise
496 delegates to cachestore()."""
497 stats = self.file_totals[self._getfileid(filename, store=store)]
498 if extended:
499 stats["extended"] = self.file_extended_totals(filename, store=store)
500 return stats
501
502 @transaction
504 """Helper method for cachestorechecks() and recacheunit()"""
505
506
507 dummy = (-1, fileid, configid, "noerror", "")
508 unitvalues = [dummy]
509
510 errornames = []
511 for index, unit in enumerate(units):
512 if unit.istranslatable():
513
514 if unitindex:
515 index = unitindex
516 failures = checker.run_filters(unit)
517 for checkname, checkmessage in failures.iteritems():
518 unitvalues.append((index, fileid, configid, checkname, checkmessage))
519 errornames.append("check-" + checkname)
520 checker.setsuggestionstore(None)
521
522 if unitindex:
523
524
525 unitvalues.remove(dummy)
526 errornames.append("total")
527
528
529 self.cur.executemany("""INSERT INTO uniterrors
530 (unitindex, fileid, configid, name, message)
531 values (?, ?, ?, ?, ?);""",
532 unitvalues)
533 return errornames
534
535 @transaction
537 """Calculates and caches the error statistics of the given store
538 unconditionally."""
539
540
541 self.cur.execute("""DELETE FROM uniterrors WHERE
542 fileid=?;""", (fileid,))
543 self._cacheunitschecks(store.units, fileid, configid, checker)
544 return fileid
545
547 values = self.cur.execute("""
548 SELECT state, sourcewords, targetwords
549 FROM units
550 WHERE fileid=? AND unitid=?
551 """, (fileid, unitid))
552 result = values.fetchone()
553 if result is not None:
554 return result
555 else:
556 print >> sys.stderr, """WARNING: Database in inconsistent state.
557 fileid %d and unitid %s have no entries in the table units.""" % (fileid, unitid)
558
559
560
561 return []
562
563 @transaction
565 """Recalculate all information for a specific unit. This is necessary
566 for updating all statistics when a translation of a unit took place,
567 for example.
568
569 This method assumes that everything was up to date before (file totals,
570 checks, checker config, etc."""
571 fileid = self._getfileid(filename, check_mod_info=False)
572 configid = self._get_config_id(fileid, checker)
573 unitid = unit.getid()
574
575 totals_without_unit = self.file_totals[fileid] - \
576 FileTotals.new_record(*self.get_unit_stats(fileid, unitid))
577 self.cur.execute("""SELECT unitindex FROM units WHERE
578 fileid=? AND unitid=?;""", (fileid, unitid))
579 unitindex = self.cur.fetchone()[0]
580 self.cur.execute("""DELETE FROM units WHERE
581 fileid=? AND unitid=?;""", (fileid, unitid))
582 state = [self._cacheunitstats([unit], fileid, unitindex, totals_without_unit)]
583
584 self.cur.execute("""DELETE FROM uniterrors WHERE
585 fileid=? AND unitindex=?;""", (fileid, unitindex))
586 if os.path.exists(suggestion_filename(filename)):
587 checker.setsuggestionstore(factory.getobject(suggestion_filename(filename), ignore=suggestion_extension()))
588 state.extend(self._cacheunitschecks([unit], fileid, configid, checker, unitindex))
589 return state
590
591 - def _checkerrors(self, filename, fileid, configid, checker, store):
592
593 def geterrors():
594 self.cur.execute("""SELECT
595 name,
596 unitindex
597 FROM uniterrors WHERE fileid=? and configid=?
598 ORDER BY unitindex;""", (fileid, configid))
599 return self.cur.fetchone(), self.cur
600
601 first, cur = geterrors()
602 if first is not None:
603 return first, cur
604
605
606
607 if callable(store):
608 store = store()
609 else:
610 store = store or factory.getobject(filename)
611
612 if os.path.exists(suggestion_filename(filename)):
613 checker.setsuggestionstore(factory.getobject(suggestion_filename(filename), ignore=suggestion_extension()))
614 self._cachestorechecks(fileid, store, checker, configid)
615 return geterrors()
616
617 - def _geterrors(self, filename, fileid, configid, checker, store):
618 result = []
619 first, cur = self._checkerrors(filename, fileid, configid, checker, store)
620 result.append(first)
621 result.extend(cur.fetchall())
622 return result
623
624 @transaction
626 configid = self._getstoredcheckerconfig(checker)
627 if configid:
628 return configid
629 self.cur.execute("""INSERT INTO checkerconfigs
630 (configid, config) values (NULL, ?);""",
631 (str(checker.config.__dict__),))
632 return self.cur.lastrowid
633
634 - def filechecks(self, filename, checker, store=None):
635 """Retrieves the error statistics for the given file if possible,
636 otherwise delegates to cachestorechecks()."""
637 fileid = self._getfileid(filename, store=store)
638 configid = self._get_config_id(fileid, checker)
639 values = self._geterrors(filename, fileid, configid, checker, store)
640
641 errors = emptyfilechecks()
642 for value in values:
643 if value[1] == -1:
644 continue
645 checkkey = 'check-' + value[0]
646 if not checkkey in errors:
647 errors[checkkey] = []
648 errors[checkkey].append(value[1])
649
650 return errors
651
653 fileid = self._getfileid(filename)
654 configid = self._get_config_id(fileid, checker)
655 self._checkerrors(filename, fileid, configid, checker, None)
656 self.cur.execute("""SELECT
657 name,
658 unitindex
659 FROM uniterrors
660 WHERE fileid=? and configid=? and name=?;""", (fileid, configid, name))
661 return self.cur.fetchone() is not None
662
664 """Return a dictionary of unit stats mapping sets of unit
665 indices with those states"""
666 stats = emptyfilestats()
667 if extended:
668 stats["extended"] = {}
669
670 fileid = self._getfileid(filename, store=store)
671
672 self.cur.execute("""SELECT state, e_state, unitindex
673 FROM units WHERE fileid=? ORDER BY unitindex;""", (fileid,))
674 values = self.cur.fetchall()
675
676 for value in values:
677 stats[state_strings[value[0]]].append(value[2])
678 if extended:
679 if value[1] not in stats["extended"]:
680 stats["extended"][value[1]] = []
681 stats["extended"][value[1]].append(value[2])
682 stats["total"].append(value[2])
683 return stats
684
685 - def filestats(self, filename, checker, store=None, extended=False):
692
693 - def unitstats(self, filename, _lang=None, store=None):
694
695
696
697 """Return a dictionary of property names mapping to arrays which
698 map unit indices to property values.
699
700 Please note that this is different from filestats, since filestats
701 supplies sets of unit indices with a given property, whereas this
702 method supplies arrays which map unit indices to given values."""
703 stats = emptyunitstats()
704
705
706 fileid = self._getfileid(filename, store=store)
707
708 self.cur.execute("""SELECT
709 sourcewords, targetwords
710 FROM units WHERE fileid=?
711 ORDER BY unitindex;""", (fileid,))
712
713 for sourcecount, targetcount in self.cur.fetchall():
714 stats["sourcewordcount"].append(sourcecount)
715 stats["targetwordcount"].append(targetcount)
716
717 return stats
718