1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21 """Manage the Trados .txt Translation Memory format
22
23 A Trados file looks like this::
24 <TrU>
25 <CrD>18012000, 13:18:35
26 <CrU>CAROL-ANN
27 <UsC>0
28 <Seg L=EN_GB>Association for Road Safety \endash Conference
29 <Seg L=DE_DE>Tagung der Gesellschaft für Verkehrssicherheit
30 </TrU>
31 <TrU>
32 <CrD>18012000, 13:19:14
33 <CrU>CAROL-ANN
34 <UsC>0
35 <Seg L=EN_GB>Road Safety Education in our Schools
36 <Seg L=DE_DE>Verkehrserziehung an Schulen
37 </TrU>
38 """
39
40 import re
41 import time
42
43 try:
44
45 from BeautifulSoup import BeautifulStoneSoup
46 except ImportError:
47 raise ImportError("BeautifulSoup is not installed. Support for Trados txt is disabled.")
48
49 from translate.storage import base
50
51 TRADOS_TIMEFORMAT = "%d%m%Y, %H:%M:%S"
52 """Time format used by Trados .txt"""
53
54 RTF_ESCAPES = {
55 ur"\emdash": u"—",
56 ur"\endash": u"–",
57
58 ur"\emspace": u"\u2003",
59
60 ur"\enspace": u"\u2002",
61
62 ur"\bullet": u"•",
63 ur"\lquote": u"‘",
64 ur"\rquote": u"’",
65 ur"\ldblquote": u"“",
66 ur"\rdblquote": u"”",
67 ur"\~": u"\u00a0",
68 ur"\-": u"\u00ad",
69 ur"\_": u"‑",
70
71
72
73 }
74 """RTF control to Unicode map
75 U{<http://msdn.microsoft.com/en-us/library/aa140283%28v=office.10%29.aspx#rtfspec_specialchar>}
76 """
77
78
84
85
91
92
94 """Manages the timestamps in the Trados .txt format of DDMMYYY, hh:mm:ss"""
95
97 self._time = None
98 if newtime:
99 if isinstance(newtime, basestring):
100 self.timestring = newtime
101 elif isinstance(newtime, time.struct_time):
102 self.time = newtime
103
105 """Get the time in the Trados time format"""
106 if not self._time:
107 return None
108 else:
109 return time.strftime(TRADOS_TIMEFORMAT, self._time)
110
112 """Set the time_struct object using a Trados time formated string
113
114 @param timestring: A Trados time string (DDMMYYYY, hh:mm:ss)
115 @type timestring: String
116 """
117 self._time = time.strptime(timestring, TRADOS_TIMEFORMAT)
118 timestring = property(get_timestring, set_timestring)
119
121 """Get the time_struct object"""
122 return self._time
123
125 """Set the time_struct object
126
127 @param newtime: a new time object
128 @type newtime: time.time_struct
129 """
130 if newtime and isinstance(newtime, time.struct_time):
131 self._time = newtime
132 else:
133 self._time = None
134 time = property(get_time, set_time)
135
141
142
156
157
159
160 MARKUP_MASSAGE = [
161 (re.compile('<(?P<fulltag>(?P<tag>[^\s\/]+).*?)>(?P<content>.+)\r'),
162 lambda x: '<%(fulltag)s>%(content)s</%(tag)s>' % x.groupdict()),
163 ]
164
165
167 """A Trados translation memory file"""
168 Name = _("Trados Translation Memory")
169 Mimetypes = ["application/x-trados-tm"]
170 Extensions = ["txt"]
171
173 """construct a Wordfast TM, optionally reading in from inputfile."""
174 self.UnitClass = unitclass
175 base.TranslationStore.__init__(self, unitclass=unitclass)
176 self.filename = ''
177 self._encoding = 'iso-8859-1'
178 if inputfile is not None:
179 self.parse(inputfile)
180
182 if hasattr(input, 'name'):
183 self.filename = input.name
184 elif not getattr(self, 'filename', ''):
185 self.filename = ''
186 if hasattr(input, "read"):
187 tmsrc = input.read()
188 input.close()
189 input = tmsrc
190 self._soup = TradosSoup(input)
191 for tu in self._soup.findAll('tru'):
192 unit = TradosUnit()
193 unit._soup = TradosSoup(str(tu))
194 self.addunit(unit)
195
197
198 return self._soup.prettify()
199