1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 """functions to get decorative/informative text out of strings..."""
23
24 import re
25 import unicodedata
26 from translate.lang import data
27
29 """returns all the whitespace from the start of the string"""
30 newstring = u""
31 for c in str1:
32 if c.isspace():
33 newstring += c
34 else:
35 break
36 return newstring
37
39 """returns all the whitespace from the end of the string"""
40 newstring = u""
41 for n in range(len(str1)):
42 c = str1[-1-n]
43 if c.isspace():
44 newstring = c + newstring
45 else:
46 break
47 return newstring
48
50 """returns all the punctuation from the start of the string"""
51 newstring = u""
52 for c in str1:
53 if c in punctuation or c.isspace():
54 newstring += c
55 else:
56 break
57 return newstring
58
60 """returns all the punctuation from the end of the string"""
61
62
63 newstring = u""
64 for n in range(len(str1)):
65 c = str1[-1-n]
66 if c in punctuation or c.isspace():
67 newstring = c + newstring
68 else:
69 break
70 return newstring.replace(u"\u00a0", u" ")
71
73 """checks whether the string is entirely punctuation"""
74 for c in str1:
75 if c.isalnum():
76 return False
77 return len(str1)
78
80 """returns whether the given accelerator character is valid
81
82 @type accelerator: character
83 @param accelerator: A character to be checked for accelerator validity
84 @type acceptlist: String
85 @param acceptlist: A list of characters that are permissible as accelerators
86 @rtype: Boolean
87 @return: True if the supplied character is an acceptable accelerator
88 """
89 assert isinstance(accelerator, unicode)
90 assert isinstance(acceptlist, unicode) or acceptlist is None
91 if len(accelerator) == 0:
92 return False
93 if acceptlist is not None:
94 acceptlist = data.normalize(acceptlist)
95 if accelerator in acceptlist:
96 return True
97 return False
98 else:
99
100 accelerator = accelerator.replace("_","")
101 if accelerator in u"-?":
102 return True
103 if not accelerator.isalnum():
104 return False
105
106
107
108 decomposition = unicodedata.decomposition(accelerator)
109
110 decomposition = re.sub("<[^>]+>", "", decomposition).strip()
111 return decomposition.count(" ") == 0
112
114 """returns all the accelerators and locations in str1 marked with a given marker"""
115 accelerators = []
116 badaccelerators = []
117 currentpos = 0
118 while currentpos >= 0:
119 currentpos = str1.find(accelmarker, currentpos)
120 if currentpos >= 0:
121 accelstart = currentpos
122 currentpos += len(accelmarker)
123
124 accelend = currentpos + 1
125 if accelend > len(str1):
126 break
127 accelerator = str1[currentpos:accelend]
128 currentpos = accelend
129 if isvalidaccelerator(accelerator, acceptlist):
130 accelerators.append((accelstart, accelerator))
131 else:
132 badaccelerators.append((accelstart, accelerator))
133 return accelerators, badaccelerators
134
136 """returns all the variables and locations in str1 marked with a given marker"""
137 variables = []
138 currentpos = 0
139 while currentpos >= 0:
140 variable = None
141 currentpos = str1.find(startmarker, currentpos)
142 if currentpos >= 0:
143 startmatch = currentpos
144 currentpos += len(startmarker)
145 if endmarker is None:
146
147 endmatch = currentpos
148 for n in range(currentpos, len(str1)):
149 if not (str1[n].isalnum() or str1[n] == '_'):
150 endmatch = n
151 break
152 if currentpos == endmatch:
153 endmatch = len(str1)
154 if currentpos < endmatch:
155 variable = str1[currentpos:endmatch]
156 currentpos = endmatch
157 elif type(endmarker) == int:
158
159 endmatch = currentpos + endmarker
160 if endmatch > len(str1):
161 break
162 variable = str1[currentpos:endmatch]
163 currentpos = endmatch
164 else:
165 endmatch = str1.find(endmarker, currentpos)
166 if endmatch == -1:
167 break
168
169 start2 = str1.rfind(startmarker, currentpos, endmatch)
170 if start2 != -1:
171 startmatch2 = start2
172 start2 += len(startmarker)
173 if start2 != currentpos:
174 currentpos = start2
175 startmatch = startmatch2
176 variable = str1[currentpos:endmatch]
177 currentpos = endmatch + len(endmarker)
178 if variable is not None and variable not in ignorelist:
179 if not variable or variable.replace("_","").replace(".","").isalnum():
180 variables.append((startmatch, variable))
181 return variables
182
184 """returns a function that gets a list of accelerators marked using accelmarker"""
185 def getmarkedaccelerators(str1):
186 """returns all the accelerators in str1 marked with a given marker"""
187 acclocs, badlocs = findaccelerators(str1, accelmarker, acceptlist)
188 accelerators = [accelerator for accelstart, accelerator in acclocs]
189 badaccelerators = [accelerator for accelstart, accelerator in badlocs]
190 return accelerators, badaccelerators
191 return getmarkedaccelerators
192
194 """returns a function that gets a list of variables marked using startmarker and endmarker"""
195 def getmarkedvariables(str1):
196 """returns all the variables in str1 marked with a given marker"""
197 varlocs = findmarkedvariables(str1, startmarker, endmarker)
198 variables = [variable for accelstart, variable in varlocs]
199 return variables
200 return getmarkedvariables
201
203 """returns any numbers that are in the string"""
204
205 assert isinstance(str1, unicode)
206 numbers = []
207 innumber = False
208 degreesign = u'\xb0'
209 lastnumber = ""
210 carryperiod = ""
211 for chr1 in str1:
212 if chr1.isdigit():
213 innumber = True
214 elif innumber:
215 if not (chr1 == '.' or chr1 == degreesign):
216 innumber = False
217 if lastnumber:
218 numbers.append(lastnumber)
219 lastnumber = ""
220 if innumber:
221 if chr1 == degreesign:
222 lastnumber += chr1
223 elif chr1 == '.':
224 carryperiod += chr1
225 else:
226 lastnumber += carryperiod + chr1
227 carryperiod = ""
228 else:
229 carryperiod = ""
230 if innumber:
231 if lastnumber:
232 numbers.append(lastnumber)
233 return numbers
234
240
242 """returns the email addresses that are in a string"""
243 return re.findall('[\w\.\-]+@[\w\.\-]+', str1)
244
246 """returns the URIs in a string"""
247 URLPAT = 'https?:[\w/\.:;+\-~\%#\$?=&,()]+|www\.[\w/\.:;+\-~\%#\$?=&,()]+|' +\
248 'ftp:[\w/\.:;+\-~\%#?=&,]+'
249 return re.findall(URLPAT, str1)
250
252 """returns a function that counts the number of accelerators marked with the given marker"""
253 def countmarkedaccelerators(str1):
254 """returns all the variables in str1 marked with a given marker"""
255 acclocs, badlocs = findaccelerators(str1, accelmarker, acceptlist)
256 return len(acclocs), len(badlocs)
257 return countmarkedaccelerators
258