00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059 #include <ctype.h>
00060
00061 #include <iomanip>
00062 #include <string>
00063 #include <sstream>
00064
00065 #include "GNURegex.h"
00066 #include "Error.h"
00067 #include "InternalErr.h"
00068
00069 #include "debug.h"
00070
00071 using namespace std;
00072
00073 namespace libdap {
00074
00075
00076
00077
00078
00079 string
00080 hexstring(unsigned char val)
00081 {
00082 ostringstream buf;
00083 buf << hex << setw(2) << setfill('0')
00084 << static_cast<unsigned int>(val);
00085
00086 return buf.str();
00087 }
00088
00089 string
00090 unhexstring(string s)
00091 {
00092 int val;
00093 istringstream ss(s);
00094 ss >> hex >> val;
00095 char tmp_str[2];
00096 tmp_str[0] = static_cast<char>(val);
00097 tmp_str[1] = '\0';
00098 return string(tmp_str);
00099 }
00100
00101 string
00102 octstring(unsigned char val)
00103 {
00104 ostringstream buf;
00105 buf << oct << setw(3) << setfill('0')
00106 << static_cast<unsigned int>(val);
00107
00108 return buf.str();
00109 }
00110
00111 string
00112 unoctstring(string s)
00113 {
00114 int val;
00115
00116 istringstream ss(s);
00117 ss >> oct >> val;
00118
00119 DBG(cerr << "unoctstring: " << val << endl);
00120
00121 char tmp_str[2];
00122 tmp_str[0] = static_cast<char>(val);
00123 tmp_str[1] = '\0';
00124 return string(tmp_str);
00125 }
00126
00151 string
00152 id2www(string in, const string &allowable)
00153 {
00154 string::size_type i = 0;
00155
00156 while ((i = in.find_first_not_of(allowable, i)) != string::npos) {
00157 in.replace(i, 1, "%" + hexstring(in[i]));
00158 i++;
00159 }
00160
00161 return in;
00162 }
00163
00174 string
00175 id2www_ce(string in, const string &allowable)
00176 {
00177 return id2www(in, allowable);
00178 }
00179
00208 string
00209 www2id(const string &in, const string &escape, const string &except)
00210 {
00211 string::size_type i = 0;
00212 string res = in;
00213 while ((i = res.find_first_of(escape, i)) != string::npos) {
00214 if (res.substr(i, 3) == except) {
00215 i += 3;
00216 continue;
00217 }
00218 res.replace(i, 3, unhexstring(res.substr(i + 1, 2)));
00219 }
00220
00221 return res;
00222 }
00223
00224 static string
00225 entity(char c)
00226 {
00227 switch (c) {
00228 case '>': return ">";
00229 case '<': return "<";
00230 case '&': return "&";
00231 case '\'': return "'";
00232 case '\"': return """;
00233 default:
00234 throw InternalErr(__FILE__, __LINE__, "Unrecognized character.");
00235 }
00236 }
00237
00244 string
00245 id2xml(string in, const string ¬_allowed)
00246 {
00247 string::size_type i = 0;
00248
00249 while ((i = in.find_first_of(not_allowed, i)) != string::npos) {
00250 in.replace(i, 1, entity(in[i]));
00251 i++;
00252 }
00253
00254 return in;
00255 }
00256
00262 string
00263 xml2id(string in)
00264 {
00265 string::size_type i = 0;
00266
00267 while ((i = in.find(">", i)) != string::npos)
00268 in.replace(i, 4, ">");
00269
00270 i = 0;
00271 while ((i = in.find("<", i)) != string::npos)
00272 in.replace(i, 4, "<");
00273
00274 i = 0;
00275 while ((i = in.find("&", i)) != string::npos)
00276 in.replace(i, 5, "&");
00277
00278 i = 0;
00279 while ((i = in.find("'", i)) != string::npos)
00280 in.replace(i, 6, "'");
00281
00282 i = 0;
00283 while ((i = in.find(""", i)) != string::npos)
00284 in.replace(i, 6, "\"");
00285
00286 return in;
00287 }
00288
00294 string
00295 esc2underscore(string s)
00296 {
00297 string::size_type pos;
00298 while ((pos = s.find('%')) != string::npos)
00299 s.replace(pos, 3, "_");
00300
00301 return s;
00302 }
00303
00304
00308 string
00309 escattr(string s)
00310 {
00311 const string printable = " ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789~`!@#$%^&*()_-+={[}]|\\:;<,>.?/'\"";
00312 const string ESC = "\\";
00313 const string DOUBLE_ESC = ESC + ESC;
00314 const string QUOTE = "\"";
00315 const string ESCQUOTE = ESC + QUOTE;
00316
00317
00318 string::size_type ind = 0;
00319 while ((ind = s.find_first_not_of(printable, ind)) != s.npos)
00320 s.replace(ind, 1, ESC + octstring(s[ind]));
00321
00322
00323 ind = 0;
00324 while ((ind = s.find(ESC, ind)) != s.npos) {
00325 s.replace(ind, 1, DOUBLE_ESC);
00326 ind += DOUBLE_ESC.length();
00327 }
00328
00329
00330 ind = 0;
00331 while ((ind = s.find(QUOTE, ind)) != s.npos) {
00332 s.replace(ind, 1, ESCQUOTE);
00333 ind += ESCQUOTE.length();
00334 }
00335
00336 return s;
00337 }
00338
00347 string
00348 unescattr(string s)
00349 {
00350 Regex octal("\\\\[0-3][0-7][0-7]");
00351 Regex esc_quote("\\\\\"");
00352 Regex esc_esc("\\\\\\\\");
00353 const string ESC = "\\";
00354 const string QUOTE = "\"";
00355 int matchlen;
00356 unsigned int index;
00357
00358 DBG(cerr << "0XX" << s << "XXX" << endl);
00359
00360 index = esc_esc.search(s.c_str(), s.length(), matchlen, 0);
00361 while (index < s.length()) {
00362 DBG(cerr << "1aXX" << s << "XXX index: " << index << endl);
00363 s.replace(index, 2, ESC);
00364 DBG(cerr << "1bXX" << s << "XXX index: " << index << endl);
00365 index = esc_esc.search(s.c_str(), s.length(), matchlen, 0);
00366 }
00367
00368
00369 index = esc_quote.search(s.c_str(), s.length(), matchlen, 0);
00370 while (index < s.length()) {
00371 s.replace(index, 2, QUOTE);
00372 DBG(cerr << "2XX" << s << "XXX index: " << index << endl);
00373 index = esc_quote.search(s.c_str(), s.length(), matchlen, 0);
00374 }
00375
00376
00377 index = octal.search(s.c_str(), s.length(), matchlen, 0);
00378 while (index < s.length()) {
00379 s.replace(index, 4, unoctstring(s.substr(index + 1, 3)));
00380 DBG(cerr << "3XX" << s << "XXX index: " << index << endl);
00381 index = octal.search(s.c_str(), s.length(), matchlen, 0);
00382 }
00383
00384 DBG(cerr << "4XX" << s << "XXX" << endl);
00385 return s;
00386 }
00387
00388 string
00389 munge_error_message(string msg)
00390 {
00391
00392 if (*msg.begin() != '"')
00393 msg.insert(msg.begin(), '"');
00394 if (*(msg.end() - 1) != '"')
00395 msg += "\"";
00396
00397
00398 string::iterator miter;
00399 for (miter = msg.begin() + 1; miter != msg.end() - 1; miter++)
00400 if (*miter == '"' && *(miter - 1) != '\\')
00401 miter = msg.insert(miter, '\\');
00402
00403 return msg;
00404 }
00405
00406 }
00407