OPeNDAP Hyrax Back End Server (BES)  Updated for version 3.8.3
BESUtil.cc
Go to the documentation of this file.
1 // BESUtil.cc
2 
3 // This file is part of bes, A C++ back-end server implementation framework
4 // for the OPeNDAP Data Access Protocol.
5 
6 // Copyright (c) 2004-2009 University Corporation for Atmospheric Research
7 // Author: Patrick West <pwest@ucar.edu> and Jose Garcia <jgarcia@ucar.edu>
8 //
9 // This library is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU Lesser General Public
11 // License as published by the Free Software Foundation; either
12 // version 2.1 of the License, or (at your option) any later version.
13 //
14 // This library is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 // Lesser General Public License for more details.
18 //
19 // You should have received a copy of the GNU Lesser General Public
20 // License along with this library; if not, write to the Free Software
21 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 //
23 // You can contact University Corporation for Atmospheric Research at
24 // 3080 Center Green Drive, Boulder, CO 80301
25 
26 // (c) COPYRIGHT University Corporation for Atmospheric Research 2004-2005
27 // Please read the full copyright statement in the file COPYRIGHT_UCAR.
28 //
29 // Authors:
30 // pwest Patrick West <pwest@ucar.edu>
31 // jgarcia Jose Garcia <jgarcia@ucar.edu>
32 
33 #include "config.h"
34 
35 #include <sys/types.h>
36 #include <sys/stat.h>
37 
38 #if HAVE_UNISTD_H
39 #include <unistd.h>
40 #endif
41 
42 #include <cstdio>
43 #include <cerrno>
44 #include <cstring>
45 #include <cstdlib>
46 #include <sstream>
47 #include <iostream>
48 
49 using std::istringstream ;
50 using std::cout ;
51 using std::endl ;
52 
53 #include "BESUtil.h"
54 #include "BESForbiddenError.h"
55 #include "BESNotFoundError.h"
56 #include "BESInternalError.h"
57 
58 #define CRLF "\r\n"
59 
64 void
65 BESUtil::set_mime_text( ostream &strm )
66 {
67  strm << "HTTP/1.0 200 OK" << CRLF ;
68  strm << "XBES-Server: " << PACKAGE_STRING << CRLF ;
69 
70  const time_t t = time(0);
71  strm << "Date: " << rfc822_date(t).c_str() << CRLF ;
72  strm << "Last-Modified: " << rfc822_date(t).c_str() << CRLF ;
73 
74  strm << "Content-Type: text/plain" << CRLF ;
75  // Note that Content-Description is from RFC 2045 (MIME, pt 1), not 2616.
76  strm << "Content-Description: unknown" << CRLF ;
77  strm << CRLF ;
78 }
79 
84 void
85 BESUtil::set_mime_html( ostream &strm )
86 {
87  strm << "HTTP/1.0 200 OK" << CRLF ;
88  strm << "XBES-Server: " << PACKAGE_STRING << CRLF ;
89 
90  const time_t t = time(0);
91  strm << "Date: " << rfc822_date(t).c_str() << CRLF ;
92  strm << "Last-Modified: " << rfc822_date(t).c_str() << CRLF ;
93 
94  strm << "Content-type: text/html" << CRLF ;
95  // Note that Content-Description is from RFC 2045 (MIME, pt 1), not 2616.
96  strm << "Content-Description: unknown" << CRLF ;
97  strm << CRLF ;
98 }
99 
100 // Return a MIME rfc-822 date. The grammar for this is:
101 // date-time = [ day "," ] date time ; dd mm yy
102 // ; hh:mm:ss zzz
103 //
104 // day = "Mon" / "Tue" / "Wed" / "Thu"
105 // / "Fri" / "Sat" / "Sun"
106 //
107 // date = 1*2DIGIT month 2DIGIT ; day month year
108 // ; e.g. 20 Jun 82
109 // NB: year is 4 digit; see RFC 1123. 11/30/99 jhrg
110 //
111 // month = "Jan" / "Feb" / "Mar" / "Apr"
112 // / "May" / "Jun" / "Jul" / "Aug"
113 // / "Sep" / "Oct" / "Nov" / "Dec"
114 //
115 // time = hour zone ; ANSI and Military
116 //
117 // hour = 2DIGIT ":" 2DIGIT [":" 2DIGIT]
118 // ; 00:00:00 - 23:59:59
119 //
120 // zone = "UT" / "GMT" ; Universal Time
121 // ; North American : UT
122 // / "EST" / "EDT" ; Eastern: - 5/ - 4
123 // / "CST" / "CDT" ; Central: - 6/ - 5
124 // / "MST" / "MDT" ; Mountain: - 7/ - 6
125 // / "PST" / "PDT" ; Pacific: - 8/ - 7
126 // / 1ALPHA ; Military: Z = UT;
127 // ; A:-1; (J not used)
128 // ; M:-12; N:+1; Y:+12
129 // / ( ("+" / "-") 4DIGIT ) ; Local differential
130 // ; hours+min. (HHMM)
131 
132 static const char *days[]={"Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"};
133 static const char *months[]={"Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul",
134  "Aug", "Sep", "Oct", "Nov", "Dec"};
135 
145 string
146 BESUtil::rfc822_date(const time_t t)
147 {
148  struct tm *stm = gmtime(&t);
149  char d[256];
150 
151  snprintf(d, 255, "%s, %02d %s %4d %02d:%02d:%02d GMT", days[stm->tm_wday],
152  stm->tm_mday, months[stm->tm_mon],
153  1900 + stm->tm_year,
154  stm->tm_hour, stm->tm_min, stm->tm_sec);
155  d[255] = '\0';
156  return string(d);
157 }
158 
159 string
161 {
162  int val;
163  istringstream ss( s ) ;
164  ss >> std::hex >> val;
165  char tmp_str[2];
166  tmp_str[0] = static_cast<char>(val);
167  tmp_str[1] = '\0';
168  return string(tmp_str);
169 }
170 
171 // I modified this to mirror the version in libdap. The change allows several
172 // escape sequences to by listed in 'except'. jhrg 2/18/09
173 string
174 BESUtil::www2id(const string &in, const string &escape, const string &except)
175 {
176  string::size_type i = 0;
177  string res = in;
178  while ((i = res.find_first_of(escape, i)) != string::npos) {
179  if (except.find(res.substr(i, 3)) != string::npos) {
180  i += 3;
181  continue;
182  }
183  res.replace(i, 3, unhexstring(res.substr(i + 1, 2)));
184  }
185 
186  return res;
187 }
188 
189 string
190 BESUtil::lowercase( const string &s )
191 {
192  string return_string = s ;
193  for( int j = 0; j < static_cast<int>(return_string.length()); j++ )
194  {
195  return_string[j] = (char)tolower( return_string[j] ) ;
196  }
197 
198  return return_string ;
199 }
200 
201 string
202 BESUtil::unescape( const string &s )
203 {
204  bool done = false ;
205  string::size_type index = 0 ;
206  /* string::size_type new_index = 0 ; */
207  string new_str ;
208  while( !done )
209  {
210  string::size_type bs = s.find( '\\', index ) ;
211  if( bs == string::npos )
212  {
213  new_str += s.substr( index, s.length() - index ) ;
214  done = true ;
215  }
216  else
217  {
218  new_str += s.substr( index, bs - index ) ;
219  new_str += s[bs+1] ;
220  index = bs+2 ;
221  }
222  }
223 
224  return new_str ;
225 }
226 
248 void
249 BESUtil::check_path( const string &path,
250  const string &root,
251  bool follow_sym_links )
252 {
253  // if nothing is passed in path, then the path checks out since root is
254  // assumed to be valid.
255  if( path == "" )
256  return ;
257 
258  // make sure there are no ../ in the directory, backing up in any way is
259  // not allowed.
260  string::size_type dotdot = path.find( ".." ) ;
261  if( dotdot != string::npos )
262  {
263  string s = (string)"You are not allowed to access the node " + path;
264  throw BESForbiddenError( s, __FILE__, __LINE__ ) ;
265  }
266 
267  // What I want to do is to take each part of path and check to see if it
268  // is a symbolic link and it is accessible. If everything is ok, add the
269  // next part of the path.
270  bool done = false ;
271 
272  // what is remaining to check
273  string rem = path ;
274  if( rem[0] == '/' )
275  rem = rem.substr( 1, rem.length() - 1 ) ;
276  if( rem[rem.length()-1] == '/' )
277  rem = rem.substr( 0, rem.length() - 1 ) ;
278 
279  // full path of the thing to check
280  string fullpath = root ;
281  if( fullpath[fullpath.length()-1] == '/' )
282  {
283  fullpath = fullpath.substr( 0, fullpath.length() - 1 ) ;
284  }
285 
286  // path checked so far
287  string checked ;
288 
289  while( !done )
290  {
291  size_t slash = rem.find( '/' ) ;
292  if( slash == string::npos )
293  {
294  fullpath = fullpath + "/" + rem ;
295  checked = checked + "/" + rem ;
296  done = true ;
297  }
298  else
299  {
300  fullpath = fullpath + "/" + rem.substr( 0, slash ) ;
301  checked = checked + "/" + rem.substr( 0, slash ) ;
302  rem = rem.substr( slash + 1, rem.length() - slash ) ;
303  }
304 
305  if( !follow_sym_links )
306  {
307  struct stat buf;
308  int statret = lstat( fullpath.c_str(), &buf ) ;
309  if( statret == -1 )
310  {
311  int errsv = errno ;
312  // stat failed, so not accessible. Get the error string,
313  // store in error, and throw exception
314  char *s_err = strerror( errsv ) ;
315  string error = "Unable to access node " + checked + ": " ;
316  if( s_err )
317  {
318  error = error + s_err ;
319  }
320  else
321  {
322  error = error + "unknow access error" ;
323  }
324  // ENOENT means that the node wasn't found. Otherise, access
325  // is denied for some reason
326  if( errsv == ENOENT )
327  {
328  throw BESNotFoundError( error, __FILE__, __LINE__ ) ;
329  }
330  else
331  {
332  throw BESForbiddenError( error, __FILE__, __LINE__ ) ;
333  }
334  }
335  else
336  {
337  // lstat was successful, now check if sym link
338  if( S_ISLNK( buf.st_mode ) )
339  {
340  string error = "You do not have permission to access "
341  + checked ;
342  throw BESForbiddenError( error, __FILE__, __LINE__ ) ;
343  }
344  }
345  }
346  else
347  {
348  // just do a stat and see if we can access the thing. If we
349  // can't, get the error information and throw an exception
350  struct stat buf ;
351  int statret = stat( fullpath.c_str(), &buf ) ;
352  if( statret == -1 )
353  {
354  int errsv = errno ;
355  // stat failed, so not accessible. Get the error string,
356  // store in error, and throw exception
357  char *s_err = strerror( errsv ) ;
358  string error = "Unable to access node " + checked + ": " ;
359  if( s_err )
360  {
361  error = error + s_err ;
362  }
363  else
364  {
365  error = error + "unknow access error" ;
366  }
367  // ENOENT means that the node wasn't found. Otherise, access
368  // is denied for some reason
369  if( errsv == ENOENT )
370  {
371  throw BESNotFoundError( error, __FILE__, __LINE__ ) ;
372  }
373  else
374  {
375  throw BESForbiddenError( error, __FILE__, __LINE__ ) ;
376  }
377  }
378  }
379  }
380 }
381 
382 char *
383 BESUtil::fastpidconverter( char *buf, int base )
384 {
385  return fastpidconverter( getpid(), buf, base ) ;
386 }
387 
388 char *
390  long val, /* value to be converted */
391  char *buf, /* output string */
392  int base) /* conversion base */
393 {
394  ldiv_t r; /* result of val / base */
395 
396  if (base > 36 || base < 2) /* no conversion if wrong base */
397  {
398  *buf = '\0';
399  return buf;
400  }
401  if (val < 0)
402  *buf++ = '-';
403  r = ldiv (labs(val), base);
404 
405  /* output digits of val/base first */
406 
407  if (r.quot > 0)
408  buf = fastpidconverter ( r.quot, buf, base);
409  /* output last digit */
410 
411  *buf++ = "0123456789abcdefghijklmnopqrstuvwxyz"[(int)r.rem];
412  *buf = '\0';
413  return buf;
414 }
415 
416 void
418 {
419  if( !key.empty() )
420  {
421  string::size_type first = key.find_first_not_of( " \t\n\r" ) ;
422  string::size_type last = key.find_last_not_of( " \t\n\r" ) ;
423  if( first == string::npos ) key = "" ;
424  else
425  {
426  string::size_type num = last - first + 1 ;
427  string new_key = key.substr( first, num ) ;
428  key = new_key ;
429  }
430  }
431 }
432 
433 string
434 BESUtil::entity( char c )
435 {
436  switch( c )
437  {
438  case '>': return "&gt;";
439  case '<': return "&lt;";
440  case '&': return "&amp;";
441  case '\'': return "&apos;";
442  case '\"': return "&quot;";
443  default: return string(1,c); // is this proper default, just the char?
444  }
445 }
446 
453 string
454 BESUtil::id2xml( string in, const string &not_allowed )
455 {
456  string::size_type i = 0 ;
457 
458  while( ( i = in.find_first_of( not_allowed, i ) ) != string::npos )
459  {
460  in.replace( i, 1, entity( in[i] ) ) ;
461  i++ ;
462  }
463 
464  return in ;
465 }
466 
472 string
473 BESUtil::xml2id(string in)
474 {
475  string::size_type i = 0;
476 
477  while ((i = in.find("&gt;", i)) != string::npos)
478  in.replace(i, 4, ">");
479 
480  i = 0;
481  while ((i = in.find("&lt;", i)) != string::npos)
482  in.replace(i, 4, "<");
483 
484  i = 0;
485  while ((i = in.find("&amp;", i)) != string::npos)
486  in.replace(i, 5, "&");
487 
488  i = 0;
489  while ((i = in.find("&apos;", i)) != string::npos)
490  in.replace(i, 6, "'");
491 
492  i = 0;
493  while ((i = in.find("&quot;", i)) != string::npos)
494  in.replace(i, 6, "\"");
495 
496  return in;
497 }
498 
512 void
513 BESUtil::explode( char delim, const string &str, list<string> &values )
514 {
515  std::string::size_type start = 0 ;
516  std::string::size_type qstart = 0 ;
517  std::string::size_type adelim = 0 ;
518  std::string::size_type aquote = 0 ;
519  bool done = false ;
520  while( !done )
521  {
522  string aval ;
523  if( str[start] == '"' )
524  {
525  bool endquote = false ;
526  qstart = start+1 ;
527  while( !endquote )
528  {
529  aquote = str.find( '"', qstart ) ;
530  if( aquote == string::npos )
531  {
532  string currval = str.substr( start, str.length() - start ) ;
533  string err = "BESUtil::explode - No end quote after value "
534  + currval ;
535  throw BESInternalError( err, __FILE__, __LINE__ ) ;
536  }
537  // could be an escaped escape character and an escaped
538  // quote, or an escaped escape character and a quote
539  if( str[aquote-1] == '\\' )
540  {
541  if( str[aquote-2] == '\\' )
542  {
543  endquote = true ;
544  qstart = aquote + 1 ;
545  }
546  else
547  {
548  qstart = aquote+1 ;
549  }
550  }
551  else
552  {
553  endquote = true ;
554  qstart = aquote + 1 ;
555  }
556  }
557  if( str[qstart] != delim && qstart != str.length() )
558  {
559  string currval = str.substr( start, qstart - start ) ;
560  string err = "BESUtil::explode - No delim after end quote "
561  + currval ;
562  throw BESInternalError( err, __FILE__, __LINE__ ) ;
563  }
564  if( qstart == str.length() )
565  {
566  adelim = string::npos ;
567  }
568  else
569  {
570  adelim = qstart ;
571  }
572  }
573  else
574  {
575  adelim = str.find( delim, start ) ;
576  }
577  if( adelim == string::npos )
578  {
579  aval = str.substr( start, str.length() - start ) ;
580  done = true ;
581  }
582  else
583  {
584  aval = str.substr( start, adelim - start ) ;
585  }
586  values.push_back( aval ) ;
587  start = adelim + 1 ;
588  if( start == str.length() )
589  {
590  values.push_back( "" ) ;
591  done = true ;
592  }
593  }
594 }
595 
606 string
607 BESUtil::implode( const list<string> &values, char delim )
608 {
609  string result ;
610  list<string>::const_iterator i = values.begin() ;
611  list<string>::const_iterator e = values.end() ;
612  bool first = true ;
613  string::size_type d; // = string::npos ;
614  for( ; i != e; i++ )
615  {
616  if( !first ) result += delim ;
617  d = (*i).find( delim ) ;
618  if( d != string::npos && (*i)[0] != '"' )
619  {
620  string err = (string)"BESUtil::implode - delimiter exists in value "
621  + (*i) ;
622  throw BESInternalError( err, __FILE__, __LINE__ ) ;
623  }
624  //d = string::npos ;
625  result += (*i) ;
626  first = false ;
627  }
628  return result ;
629 }
630 
650 void
651 BESUtil::url_explode( const string &url_str, BESUtil::url &url_parts )
652 {
653  string rest ;
654 
655  string::size_type colon = url_str.find( ":" ) ;
656  if( colon == string::npos )
657  {
658  string err = "BESUtil::url_explode: missing colon for protocol" ;
659  throw BESInternalError( err, __FILE__, __LINE__ ) ;
660  }
661 
662  url_parts.protocol = url_str.substr( 0, colon ) ;
663 
664  if( url_str.substr( colon, 3 ) != "://" )
665  {
666  string err = "BESUtil::url_explode: no :// in the URL" ;
667  throw BESInternalError( err, __FILE__, __LINE__ ) ;
668  }
669 
670  colon += 3 ;
671  rest = url_str.substr( colon ) ;
672 
673  string::size_type slash = rest.find( "/" ) ;
674  if( slash == string::npos ) slash = rest.length() ;
675 
676  string::size_type at = rest.find( "@" ) ;
677  if( ( at != string::npos ) && ( at < slash ) )
678  {
679  // everything before the @ is username:password
680  string up = rest.substr( 0, at ) ;
681  colon = up.find( ":" ) ;
682  if( colon != string::npos )
683  {
684  url_parts.uname = up.substr( 0, colon ) ;
685  url_parts.psswd = up.substr( colon+1 ) ;
686  }
687  else
688  {
689  url_parts.uname = up ;
690  }
691  // everything after the @ is domain/path
692  rest = rest.substr( at+1 ) ;
693  }
694  slash = rest.find( "/" ) ;
695  if( slash == string::npos ) slash = rest.length() ;
696  colon = rest.find( ":" ) ;
697  if( ( colon != string::npos ) && ( colon < slash ) )
698  {
699  // everything before the colon is the domain
700  url_parts.domain = rest.substr( 0, colon ) ;
701  // everything after the folon is port/path
702  rest = rest.substr( colon+1 ) ;
703  slash = rest.find( "/" ) ;
704  if( slash != string::npos )
705  {
706  url_parts.port = rest.substr( 0, slash ) ;
707  url_parts.path = rest.substr( slash+1 ) ;
708  }
709  else
710  {
711  url_parts.port = rest ;
712  url_parts.path = "" ;
713  }
714  }
715  else
716  {
717  slash = rest.find( "/" ) ;
718  if( slash != string::npos )
719  {
720  url_parts.domain = rest.substr( 0, slash ) ;
721  url_parts.path = rest.substr( slash+1 ) ;
722  }
723  else
724  {
725  url_parts.domain = rest ;
726  }
727  }
728 }
729 
730 string
732 {
733  string url = url_parts.protocol + "://" ;
734  if( !url_parts.uname.empty() )
735  {
736  url += url_parts.uname ;
737  if( !url_parts.psswd.empty() ) url += ":" + url_parts.psswd ;
738  url += "@" ;
739  }
740  url += url_parts.domain ;
741  if( !url_parts.port.empty() ) url += ":" + url_parts.port ;
742  if( !url_parts.path.empty() ) url += "/" + url_parts.path ;
743 
744  return url ;
745 }
746 
error thrown if the resource requested cannot be found
static string id2xml(string in, const string &not_allowed="><&'\"")
convert characters not allowed in xml to escaped characters
Definition: BESUtil.cc:454
exception thrown if inernal error encountered
static string lowercase(const string &s)
Convert a string to all lower case.
Definition: BESUtil.cc:190
static string www2id(const string &in, const string &escape="%", const string &except="")
This functions are used to unescape hex characters from strings.
Definition: BESUtil.cc:174
static void removeLeadingAndTrailingBlanks(string &key)
remove leading and trailing blanks from a string
Definition: BESUtil.cc:417
static string implode(const list< string > &values, char delim)
implode a list of values into a single string delimited by delim
Definition: BESUtil.cc:607
string port
Definition: BESUtil.h:112
static string xml2id(string in)
unescape xml escaped characters
Definition: BESUtil.cc:473
static void set_mime_html(ostream &strm)
Generate an HTTP 1.0 response header for a html document.
Definition: BESUtil.cc:85
static void explode(char delim, const string &str, list< string > &values)
explode a string into an array given a delimiter
Definition: BESUtil.cc:513
#define CRLF
Definition: BESUtil.cc:58
static void set_mime_text(ostream &strm)
Generate an HTTP 1.0 response header for a text document.
Definition: BESUtil.cc:65
static string unhexstring(string s)
Definition: BESUtil.cc:160
static void url_explode(const string &url_str, BESUtil::url &url_parts)
Given a url, break the url into its different parts.
Definition: BESUtil.cc:651
string path
Definition: BESUtil.h:113
error thrown if the BES is not allowed to access the resource requested
static string url_create(BESUtil::url &url_parts)
Definition: BESUtil.cc:731
string protocol
Definition: BESUtil.h:108
static char * fastpidconverter(char *buf, int base)
convert pid and place in provided buffer
Definition: BESUtil.cc:383
string uname
Definition: BESUtil.h:110
string psswd
Definition: BESUtil.h:111
static string unescape(const string &s)
Unescape characters with backslash before them.
Definition: BESUtil.cc:202
#define PACKAGE_STRING
Definition: config.h:126
static void check_path(const string &path, const string &root, bool follow_sym_links)
Check if the specified path is valid.
Definition: BESUtil.cc:249
string domain
Definition: BESUtil.h:109