libdap++  Updated for version 3.11.7
util.cc
Go to the documentation of this file.
1 
2 // -*- mode: c++; c-basic-offset:4 -*-
3 
4 // This file is part of libdap, A C++ implementation of the OPeNDAP Data
5 // Access Protocol.
6 
7 // Copyright (c) 2002,2003 OPeNDAP, Inc.
8 // Author: James Gallagher <jgallagher@opendap.org>
9 //
10 // This library is free software; you can redistribute it and/or
11 // modify it under the terms of the GNU Lesser General Public
12 // License as published by the Free Software Foundation; either
13 // version 2.1 of the License, or (at your option) any later version.
14 //
15 // This library is distributed in the hope that it will be useful,
16 // but WITHOUT ANY WARRANTY; without even the implied warranty of
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 // Lesser General Public License for more details.
19 //
20 // You should have received a copy of the GNU Lesser General Public
21 // License along with this library; if not, write to the Free Software
22 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 //
24 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
25 
26 // (c) COPYRIGHT URI/MIT 1994-1999
27 // Please read the full copyright statement in the file COPYRIGHT_URI.
28 //
29 // Authors:
30 // jhrg,jimg James Gallagher <jgallagher@gso.uri.edu>
31 
32 // Utility functions used by the api.
33 //
34 // jhrg 9/21/94
35 
36 #include "config.h"
37 
38 static char rcsid[] not_used =
39  {"$Id: util.cc 26028 2012-11-07 20:34:13Z jimg $"
40  };
41 
42 #include <fstream>
43 
44 #include <cassert>
45 #include <cstring>
46 
47 #include <ctype.h>
48 #ifndef TM_IN_SYS_TIME
49 #include <time.h>
50 #else
51 #include <sys/time.h>
52 #endif
53 
54 #ifndef WIN32
55 #include <unistd.h> // for stat
56 #else
57 #include <io.h>
58 #include <fcntl.h>
59 #include <process.h>
60 #endif
61 
62 #include <sys/types.h>
63 #include <sys/stat.h>
64 
65 #include <string>
66 #include <sstream>
67 #include <vector>
68 #include <algorithm>
69 #include <stdexcept>
70 
71 #include "BaseType.h"
72 #include "Str.h"
73 #include "Url.h"
74 #include "Sequence.h"
75 #include "Error.h"
76 #include "parser.h"
77 #include "util.h"
78 #include "GNURegex.h"
79 #include "debug.h"
80 
81 using namespace std;
82 
83 namespace libdap {
84 
85 // Remove spaces from the start of a URL and from the start of any constraint
86 // expression it contains. 4/7/98 jhrg
87 
96 string
97 prune_spaces(const string &name)
98 {
99  // If the URL does not even have white space return.
100  if (name.find_first_of(' ') == name.npos)
101  return name;
102  else {
103  // Strip leading spaces from http://...
104  unsigned int i = name.find_first_not_of(' ');
105  string tmp_name = name.substr(i);
106 
107  // Strip leading spaces from constraint part (following `?').
108  unsigned int j = tmp_name.find('?') + 1;
109  i = tmp_name.find_first_not_of(' ', j);
110  tmp_name.erase(j, i - j);
111 
112  return tmp_name;
113  }
114 }
115 
116 // Compare elements in a list of (BaseType *)s and return true if there are
117 // no duplicate elements, otherwise return false.
118 
119 bool
120 unique_names(vector<BaseType *> l, const string &var_name,
121  const string &type_name, string &msg)
122 {
123  // copy the identifier names to a vector
124  vector<string> names(l.size());
125 
126  int nelem = 0;
127  typedef std::vector<BaseType *>::const_iterator citer ;
128  for (citer i = l.begin(); i != l.end(); i++) {
129  assert(*i);
130  names[nelem++] = (*i)->name();
131  DBG(cerr << "NAMES[" << nelem - 1 << "]=" << names[nelem-1] << endl);
132  }
133 
134  // sort the array of names
135  sort(names.begin(), names.end());
136 
137 #ifdef DODS_DEBUG2
138  cout << "unique:" << endl;
139  for (int ii = 0; ii < nelem; ++ii)
140  cout << "NAMES[" << ii << "]=" << names[ii] << endl;
141 #endif
142 
143  // sort the array of names
144  sort(names.begin(), names.end());
145 
146 #ifdef DODS_DEBUG2
147  cout << "unique:" << endl;
148  for (int ii = 0; ii < nelem; ++ii)
149  cout << "NAMES[" << ii << "]=" << names[ii] << endl;
150 #endif
151 
152  // look for any instance of consecutive names that are ==
153  for (int j = 1; j < nelem; ++j) {
154  if (names[j-1] == names[j]) {
155  ostringstream oss;
156  oss << "The variable `" << names[j]
157  << "' is used more than once in " << type_name << " `"
158  << var_name << "'";
159  msg = oss.str();
160 
161  return false;
162  }
163  }
164 
165  return true;
166 }
167 
168 const char *
170 {
171  return LIBDAP_ROOT;
172 }
173 
174 extern "C"
175  const char *
177 {
178  return PACKAGE_VERSION;
179 }
180 
181 extern "C"
182  const char *
184 {
185  return PACKAGE_NAME;
186 }
187 
188 // Since Server4 can get compressed responses using Tomcat, bail on this
189 // software (which complicates building under Win32). It can be turned on
190 // for use with Server3 in configure.ac.
191 
192 #if COMPRESSION_FOR_SERVER3
193 
194 // Return true if the program deflate exists and is executable by user, group
195 // and world. If this returns false the caller should assume that server
196 // filter programs won't be able to find the deflate program and thus won't
197 // be able to compress the return document.
198 // NB: this works because this function uses the same rules as compressor()
199 // (which follows) to look for deflate. 2/11/98 jhrg
200 
201 bool
202 deflate_exists()
203 {
204  DBG(cerr << "Entering deflate_exists...");
205 
206  int status = false;
207  struct stat buf;
208 
209 #ifdef WIN32
210  string deflate = (string)libdap_root() + "\\bin\\deflate";
211 #else
212  string deflate = (string)libdap_root() + "/sbin/deflate";
213 #endif
214 
215  // Check that the file exists...
216  // First look for deflate using DODS_ROOT (compile-time constant subsumed
217  // by an environment variable) and if that fails in the CWD which finds
218  // the program when it is in the same directory as the dispatch script
219  // and other server components. 2/11/98 jhrg
220  status = (stat(deflate.c_str(), &buf) == 0)
221 #ifdef WIN32
222  || (stat(".\\deflate", &buf) == 0);
223 #else
224  || (stat("./deflate", &buf) == 0);
225 #endif
226 
227  // and that it can be executed.
228 #ifdef WIN32
229  status &= (buf.st_mode & _S_IEXEC);
230 #else
231  status &= buf.st_mode & (S_IXUSR | S_IXGRP | S_IXOTH);
232 #endif
233  DBG(cerr << " returning " << (status ? "true." : "false.") << endl);
234  return (status != 0);
235 }
236 
237 FILE *
238 compressor(FILE *output, int &childpid)
239 {
240 #ifdef WIN32
241  // There is no such thing as a "fork" under win32. This makes it so that
242  // we have to juggle handles more aggressively. This code hasn't been
243  // tested and shown to work as of 07/2000.
244  int pid, data[2];
245  int hStdIn, hStdOut;
246 
247  if (_pipe(data, 512, O_BINARY | O_NOINHERIT) < 0) {
248  cerr << "Could not create IPC channel for compressor process"
249  << endl;
250  return NULL;
251  }
252 
253 
254  // This sets up for the child process, but it has to be reversed for the
255  // parent after the spawn takes place.
256 
257  // Store stdin, stdout so we have something to restore to
258  hStdIn = _dup(_fileno(stdin));
259  hStdOut = _dup(_fileno(stdout));
260 
261  // Child is to read from read end of pipe
262  if (_dup2(data[0], _fileno(stdin)) != 0) {
263  cerr << "dup of child stdin failed" << endl;
264  return NULL;
265  }
266  // Child is to write its's stdout to file
267  if (_dup2(_fileno(output), _fileno(stdout)) != 0) {
268  cerr << "dup of child stdout failed" << endl;
269  return NULL;
270  }
271 
272  // Spawn child process
273  string deflate = "deflate.exe";
274  if ((pid = _spawnlp(_P_NOWAIT, deflate.c_str(), deflate.c_str(),
275  "-c", "5", "-s", NULL)) < 0) {
276  cerr << "Could not spawn to create compressor process" << endl;
277  return NULL;
278  }
279 
280  // Restore stdin, stdout for parent and close duplicate copies
281  if (_dup2(hStdIn, _fileno(stdin)) != 0) {
282  cerr << "dup of stdin failed" << endl;
283  return NULL;
284  }
285  if (_dup2(hStdOut, _fileno(stdout)) != 0) {
286  cerr << "dup of stdout failed" << endl;
287  return NULL;
288  }
289  close(hStdIn);
290  close(hStdOut);
291 
292  // Tell the parent that it reads from the opposite end of the
293  // place where the child writes.
294  close(data[0]);
295  FILE *input = fdopen(data[1], "w");
296  setbuf(input, 0);
297  childpid = pid;
298  return input;
299 
300 #else
301  FILE *ret_file = NULL ;
302 
303  int pid, data[2];
304 
305  if (pipe(data) < 0) {
306  cerr << "Could not create IPC channel for compressor process"
307  << endl;
308  return NULL;
309  }
310 
311  if ((pid = fork()) < 0) {
312  cerr << "Could not fork to create compressor process" << endl;
313  return NULL;
314  }
315 
316  // The parent process closes the write end of the Pipe, and creates a
317  // FILE * using fdopen(). The FILE * is used by the calling program to
318  // access the read end of the Pipe.
319 
320  if (pid > 0) { // Parent, pid is that of the child
321  close(data[0]);
322  ret_file = fdopen(data[1], "w");
323  setbuf(ret_file, 0);
324  childpid = pid;
325  }
326  else { // Child
327  close(data[1]);
328  dup2(data[0], 0); // Read from the pipe...
329  dup2(fileno(output), 1); // Write to the FILE *output.
330 
331  DBG(cerr << "Opening compression stream." << endl);
332 
333  // First try to run deflate using DODS_ROOT (the value read from the
334  // DODS_ROOT environment variable takes precedence over the value set
335  // at build time. If that fails, try the CWD.
336  string deflate = (string)libdap_root() + "/sbin/deflate";
337  (void) execl(deflate.c_str(), "deflate", "-c", "5", "-s", NULL);
338  (void) execl("./deflate", "deflate", "-c", "5", "-s", NULL);
339  cerr << "Warning: Could not start compressor!" << endl;
340  cerr << "defalte should be in DODS_ROOT/etc or in the CWD!"
341  << endl;
342  _exit(127); // Only here if an error occurred.
343  }
344 
345  return ret_file ;
346 #endif
347 }
348 
349 #endif // COMPRESSION_FOR_SERVER3
350 
357 bool
358 dir_exists(const string &dir)
359 {
360  struct stat buf;
361 
362  return (stat(dir.c_str(), &buf) == 0) && (buf.st_mode & S_IFDIR);
363 }
364 
365 #if 0
366 
367 // UNTESTED 11/7/12
368 
376 bool
377 dir_writable(const string &dir)
378 {
379  try {
380  string test = dir + "/test.txt";
381  ofstream ofs(dir.c_str());
382  ofs.write("test", 5);
383  ofs.close();
384  unlink(test.c_str());
385  return true;
386  }
387  catch (...) {
388  return false;
389  }
390 }
391 #endif
392 
393 // This function returns a pointer to the system time formated for an httpd
394 // log file.
395 
396 string
398 {
399  time_t TimBin;
400 
401  if (time(&TimBin) == (time_t) - 1)
402  return string("time() error");
403  else {
404  string TimStr = ctime(&TimBin);
405  return TimStr.substr(0, TimStr.size() - 2); // remove the \n
406  }
407 }
408 
409 void
410 downcase(string &s)
411 {
412  for (unsigned int i = 0; i < s.length(); i++)
413  s[i] = tolower(s[i]);
414 }
415 
416 bool
417 is_quoted(const string &s)
418 {
419  return (!s.empty() && s[0] == '\"' && s[s.length()-1] == '\"');
420 }
421 
422 string
423 remove_quotes(const string &s)
424 {
425  if (is_quoted(s))
426  return s.substr(1, s.length() - 2);
427  else
428  return s;
429 }
430 
431 #ifdef WIN32
432 // Sometimes need to buffer within an iostream under win32 when
433 // we want the output to go to a FILE *. This is because
434 // it's not possible to associate an ofstream with a FILE *
435 // under the Standard ANSI C++ Library spec. Unix systems
436 // don't follow the spec in this regard.
437 void flush_stream(iostream ios, FILE *out)
438 {
439  int nbytes;
440  char buffer[512];
441 
442  ios.get(buffer, 512, NULL);
443  while ((nbytes = ios.gcount()) > 0) {
444  fwrite(buffer, 1, nbytes, out);
445  ios.get(buffer, 512, NULL);
446  }
447 
448  return;
449 }
450 #endif
451 
452 // Jose Garcia
453 void
454 append_long_to_string(long val, int base, string &str_val)
455 {
456  // The array digits contains 36 elements which are the
457  // posible valid digits for out bases in the range
458  // [2,36]
459  char digits[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
460  // result of val / base
461  ldiv_t r;
462 
463  if (base > 36 || base < 2) {
464  // no conversion if wrong base
465  std::invalid_argument ex("The parameter base has an invalid value.");
466  throw ex;
467  }
468  if (val < 0)
469  str_val += '-';
470  r = ldiv(labs(val), base);
471 
472  // output digits of val/base first
473  if (r.quot > 0)
474  append_long_to_string(r.quot, base, str_val);
475 
476  // output last digit
477 
478  str_val += digits[(int)r.rem];
479 }
480 
481 // base defaults to 10
482 string
483 long_to_string(long val, int base)
484 {
485  string s;
486  append_long_to_string(val, base, s);
487  return s;
488 }
489 
490 // Jose Garcia
491 void append_double_to_string(const double &num, string &str)
492 {
493  // s having 100 characters should be enough for sprintf to do its job.
494  // I want to banish all instances of sprintf. 10/5/2001 jhrg
495  ostringstream oss;
496  oss.precision(9);
497  oss << num;
498  str += oss.str();
499 }
500 
501 string
502 double_to_string(const double &num)
503 {
504  string s;
505  append_double_to_string(num, s);
506  return s;
507 }
508 
509 // Get the version number of the core software. Defining this means that
510 // clients of the DAP don't have to rely on config.h for the version
511 // number.
512 string
514 {
515  return (string)"OPeNDAP DAP/" + libdap_version() + ": compiled on " + __DATE__ + ":" + __TIME__ ;
516 }
517 
518 // Given a pathname, return the file at the end of the path. This is used
519 // when reporting errors (maybe other times, too) to keep the server from
520 // revealing too much about its organization when sending error responses
521 // back to clients. 10/11/2000 jhrg
522 // MT-safe. 08/05/02 jhrg
523 
524 #ifdef WIN32
525 static const char path_sep[] =
526  {"\\"
527  };
528 #else
529 static const char path_sep[] =
530  {"/"
531  };
532 #endif
533 
534 string
535 path_to_filename(string path)
536 {
537  string::size_type pos = path.rfind(path_sep);
538 
539  return (pos == string::npos) ? path : path.substr(++pos);
540 }
541 
546 string
547 file_to_string(FILE *fp)
548 {
549  rewind(fp);
550  ostringstream oss;
551  char c;
552  while (fread(&c, 1, 1, fp))
553  oss << c;
554  return oss.str();
555 }
556 
557 #if 0
558 // This code is not used.
559 int
560 wildcmp(const char *wild, const char *string)
561 {
562  // Written by Jack Handy - jakkhandy@hotmail.com
563 
564  if (!wild || !string)
565  return 0;
566 
567  const char *cp = NULL, *mp = NULL;
568 
569  while ((*string) && (*wild != '*')) {
570  if ((*wild != *string) && (*wild != '?')) {
571  return 0;
572  }
573  wild++;
574  string++;
575  }
576 
577  while (*string) {
578  if (*wild == '*') {
579  if (!*++wild) {
580  return 1;
581  }
582  mp = wild;
583  cp = string+1;
584  } else if ((*wild == *string) || (*wild == '?')) {
585  wild++;
586  string++;
587  } else {
588  wild = mp;
589  string = cp++;
590  }
591  }
592 
593  while (*wild == '*') {
594  wild++;
595  }
596  return !*wild;
597 }
598 #endif
599 
600 #define CHECK_BIT( tab, bit ) ( tab[ (bit)/8 ] & (1<<( (bit)%8 )) )
601 #define BITLISTSIZE 16 /* bytes used for [chars] in compiled expr */
602 
603 static void globchars( const char *s, const char *e, char *b );
604 
605 /*
606  * glob: match a string against a simple pattern
607  *
608  * Understands the following patterns:
609  *
610  * * any number of characters
611  * ? any single character
612  * [a-z] any single character in the range a-z
613  * [^a-z] any single character not in the range a-z
614  * \x match x
615  *
616  * @param c The pattern
617  * @param s The string
618  * @return 0 on success, -1 if the pattern is exhausted but there are
619  * characters remaining in the string and 1 if the pattern does not match
620  */
621 
622 int
623 glob(const char *c, const char *s)
624 {
625  if (!c || !s)
626  return 1;
627 
628  char bitlist[BITLISTSIZE];
629  int i = 0;
630  for (;;) {
631  ++i;
632  switch (*c++) {
633  case '\0':
634  return *s ? -1 : 0;
635 
636  case '?':
637  if (!*s++)
638  return i/*1*/;
639  break;
640 
641  case '[': {
642  /* scan for matching ] */
643 
644  const char *here = c;
645  do {
646  if (!*c++)
647  return i/*1*/;
648  } while (here == c || *c != ']');
649  c++;
650 
651  /* build character class bitlist */
652 
653  globchars(here, c, bitlist);
654 
655  if (!CHECK_BIT( bitlist, *(unsigned char *)s ))
656  return i/*1*/;
657  s++;
658  break;
659  }
660 
661  case '*': {
662  const char *here = s;
663 
664  while (*s)
665  s++;
666 
667  /* Try to match the rest of the pattern in a recursive */
668  /* call. If the match fails we'll back up chars, retrying. */
669 
670  while (s != here) {
671  int r;
672 
673  /* A fast path for the last token in a pattern */
674 
675  r = *c ? glob(c, s) : *s ? -1 : 0;
676 
677  if (!r)
678  return 0;
679  else if (r < 0)
680  return i/*1*/;
681 
682  --s;
683  }
684  break;
685  }
686 
687  case '\\':
688  /* Force literal match of next char. */
689 
690  if (!*c || *s++ != *c++)
691  return i/*1*/;
692  break;
693 
694  default:
695  if (*s++ != c[-1])
696  return i/*1*/;
697  break;
698  }
699  }
700 }
701 
702 /*
703  * globchars() - build a bitlist to check for character group match
704  */
705 
706 static void globchars(const char *s, const char *e, char *b) {
707  int neg = 0;
708 
709  memset(b, '\0', BITLISTSIZE);
710 
711  if (*s == '^')
712  neg++, s++;
713 
714  while (s < e) {
715  int c;
716 
717  if (s + 2 < e && s[1] == '-') {
718  for (c = s[0]; c <= s[2]; c++)
719  b[c / 8] |= (1 << (c % 8));
720  s += 3;
721  }
722  else {
723  c = *s++;
724  b[c / 8] |= (1 << (c % 8));
725  }
726  }
727 
728  if (neg) {
729  int i;
730  for (i = 0; i < BITLISTSIZE; i++)
731  b[i] ^= 0377;
732  }
733 
734  /* Don't include \0 in either $[chars] or $[^chars] */
735 
736  b[0] &= 0376;
737 }
738 
739 int wmatch(const char *pat, const char *s)
740 {
741  if (!pat || !s)
742  return 0;
743 
744  switch (*pat) {
745  case '\0': return (*s == '\0');
746  case '?': return (*s != '\0') && wmatch(pat+1, s+1);
747  case '*': return wmatch(pat+1, s) || (*s != '\0' && wmatch(pat, s+1));
748  default: return (*s == *pat) && wmatch(pat+1, s+1);
749  }
750 }
751 
754 
760 bool
761 size_ok(unsigned int sz, unsigned int nelem)
762 {
763  return (sz > 0 && nelem < UINT_MAX / sz);
764 }
765 
782 bool
783 pathname_ok(const string &path, bool strict)
784 {
785  if (path.length() > 255)
786  return false;
787 
788  Regex name("[-0-9A-z_./]+");
789  if (!strict)
790  name = "[:print:]+";
791 
792  string::size_type len = path.length();
793  int result = name.match(path.c_str(), len);
794  // Protect against casting too big an uint to int
795  // if LEN is bigger than the max int32, the second test can't work
796  if (len > INT_MAX || result != static_cast<int>(len))
797  return false;
798 
799  return true;
800 }
801 
803 
804 } // namespace libdap
805