1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21 """Manage the OmegaT glossary format
22
23 OmegaT glossary format is used by the
24 U{OmegaT<http://www.omegat.org/en/omegat.html>} computer aided
25 translation tool.
26
27 It is a bilingual base class derived format with L{OmegaTFile}
28 and L{OmegaTUnit} providing file and unit level access.
29
30 Format Implementation
31 =====================
32 The OmegaT glossary format is a simple Tab Separated Value (TSV) file
33 with the columns: source, target, comment.
34
35 The dialect of the TSV files is specified by L{OmegaTDialect}.
36
37 Encoding
38 --------
39 The files are either UTF-8 or encoded using the system default. UTF-8
40 encoded files use the .utf8 extension while system encoded files use
41 the .tab extension.
42 """
43
44 import csv
45 import locale
46 import os.path
47 import sys
48 import time
49 from translate.storage import base
50
51 OMEGAT_FIELDNAMES = ["source", "target", "comment"]
52 """Field names for an OmegaT glossary unit"""
53
54
67 csv.register_dialect("omegat", OmegaTDialect)
68
70 """An OmegaT translation memory unit"""
76
78 """Get the dictionary of values for a OmegaT line"""
79 return self._dict
80
82 """Set the dictionary of values for a OmegaT line
83
84 @param newdict: a new dictionary with OmegaT line elements
85 @type newdict: Dict
86 """
87
88 self._dict = newdict
89 dict = property(getdict, setdict)
90
92 if key not in self._dict:
93 return None
94 elif self._dict[key]:
95 return self._dict[key].decode('utf-8')
96 else:
97 return ""
98
100 if newvalue is None:
101 self._dict[key] = None
102 if isinstance(newvalue, unicode):
103 newvalue = newvalue.encode('utf-8')
104 if not key in self._dict or newvalue != self._dict[key]:
105 self._dict[key] = newvalue
106
109
112
114 self._rich_source = None
115 return self._set_field('source', newsource)
116 source = property(getsource, setsource)
117
120
122 self._rich_target = None
123 return self._set_field('target', newtarget)
124 target = property(gettarget, settarget)
125
127 self._dict['target-lang'] = newlang
128 targetlang = property(None, settargetlang)
129
131 return str(self._dict)
132
134 return bool(self._dict.get('target', None))
135
136
138 """An OmegaT translation memory file"""
139
140
141 Name = None
142 Mimetypes = ["application/x-omegat-glossary"]
143 Extensions = ["utf8"]
145 """Construct an OmegaT glossary, optionally reading in from inputfile."""
146 self.UnitClass = unitclass
147 base.TranslationStore.__init__(self, unitclass=unitclass)
148 self.filename = ''
149 self.extension = ''
150 self._encoding = self._get_encoding()
151 if inputfile is not None:
152 self.parse(inputfile)
153
156
158 """parsese the given file or file source string"""
159 if hasattr(input, 'name'):
160 self.filename = input.name
161 elif not getattr(self, 'filename', ''):
162 self.filename = ''
163 if hasattr(input, "read"):
164 tmsrc = input.read()
165 input.close()
166 input = tmsrc
167 try:
168 input = input.decode(self._encoding).encode('utf-8')
169 except:
170 raise ValueError("OmegaT files are either UTF-8 encoded or use the default system encoding")
171 lines = csv.DictReader(input.split("\n"), fieldnames=OMEGAT_FIELDNAMES, dialect="omegat")
172 for line in lines:
173 newunit = OmegaTUnit()
174 newunit.dict = line
175 self.addunit(newunit)
176
178 output = csv.StringIO()
179 writer = csv.DictWriter(output, fieldnames=OMEGAT_FIELDNAMES, dialect="omegat")
180 unit_count = 0
181 for unit in self.units:
182 if unit.istranslated():
183 unit_count += 1
184 writer.writerow(unit.dict)
185 if unit_count == 0:
186 return ""
187 output.reset()
188 decoded = "".join(output.readlines()).decode('utf-8')
189 try:
190 return decoded.encode(self._encoding)
191 except UnicodeEncodeError:
192 return decoded.encode('utf-8')
193
195 """An OmegT translation memory file in the default system encoding"""
196
197
198 Name = None
199 Mimetypes = ["application/x-omegat-glossary"]
200 Extensions = ["tab"]
201
203 return locale.getdefaultlocale()[1]
204