OPeNDAP Hyrax Back End Server (BES)  Updated for version 3.8.3
BESCatalogUtils.cc
Go to the documentation of this file.
1 // BESCatalogUtils.cc
2 
3 // This file is part of bes, A C++ back-end server implementation framework
4 // for the OPeNDAP Data Access Protocol.
5 
6 // Copyright (c) 2004-2009 University Corporation for Atmospheric Research
7 // Author: Patrick West <pwest@ucar.edu> and Jose Garcia <jgarcia@ucar.edu>
8 //
9 // This library is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU Lesser General Public
11 // License as published by the Free Software Foundation; either
12 // version 2.1 of the License, or (at your option) any later version.
13 //
14 // This library is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 // Lesser General Public License for more details.
18 //
19 // You should have received a copy of the GNU Lesser General Public
20 // License along with this library; if not, write to the Free Software
21 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 //
23 // You can contact University Corporation for Atmospheric Research at
24 // 3080 Center Green Drive, Boulder, CO 80301
25 
26 // (c) COPYRIGHT University Corporation for Atmospheric Research 2004-2005
27 // Please read the full copyright statement in the file COPYRIGHT_UCAR.
28 //
29 // Authors:
30 // pwest Patrick West <pwest@ucar.edu>
31 // jgarcia Jose Garcia <jgarcia@ucar.edu>
32 
33 #include "config.h"
34 
35 #include <sys/types.h>
36 #include <sys/stat.h>
37 #include <dirent.h>
38 
39 #include <cerrno>
40 #include <iostream>
41 #include <sstream>
42 #include <list>
43 #include <cstring>
44 
45 using std::cout ;
46 using std::endl ;
47 using std::ostringstream ;
48 using std::list ;
49 
50 #include "BESCatalogUtils.h"
51 #include "BESCatalogList.h"
52 #include "TheBESKeys.h"
53 #include "BESInternalError.h"
54 #include "BESSyntaxUserError.h"
55 #include "BESNotFoundError.h"
56 #include "BESRegex.h"
57 #include "BESUtil.h"
58 #include "BESDapNames.h"
59 #include "BESInfo.h"
61 #include "BESContainerStorage.h"
62 #include "BESCatalogEntry.h"
63 
64 map<string, BESCatalogUtils *> BESCatalogUtils::_instances ;
65 
66 BESCatalogUtils::
67 BESCatalogUtils( const string &n )
68  : _name( n ), _follow_syms( false )
69 {
70  string key = "BES.Catalog." + n + ".RootDirectory" ;
71  bool found = false ;
72  TheBESKeys::TheKeys()->get_value( key, _root_dir, found ) ;
73  if( !found || _root_dir == "" )
74  {
75  string s = key + " not defined in BES configuration file" ;
76  throw BESSyntaxUserError( s, __FILE__, __LINE__ ) ;
77  }
78  DIR *dip = opendir( _root_dir.c_str() ) ;
79  if( dip == NULL )
80  {
81  string serr = "BESCatalogDirectory - root directory "
82  + _root_dir + " does not exist" ;
83  throw BESNotFoundError( serr, __FILE__, __LINE__ ) ;
84  }
85  closedir( dip ) ;
86 
87  found = false ;
88  key = (string)"BES.Catalog." + n + ".Exclude" ;
89  vector<string> vals ;
90  TheBESKeys::TheKeys()->get_values( key, vals, found ) ;
91  vector<string>::iterator ei = vals.begin() ;
92  vector<string>::iterator ee = vals.end() ;
93  for( ; ei != ee; ei++ )
94  {
95  string e_str = (*ei) ;
96  if( !e_str.empty() && e_str != ";" )
97  BESUtil::explode( ';', e_str, _exclude ) ;
98  }
99 
100  key = (string)"BES.Catalog." + n + ".Include" ;
101  vals.clear() ;
102  TheBESKeys::TheKeys()->get_values( key, vals, found ) ;
103  vector<string>::iterator ii = vals.begin() ;
104  vector<string>::iterator ie = vals.end() ;
105  for( ; ii != ie; ii++ )
106  {
107  string i_str = (*ii) ;
108  if( !i_str.empty() && i_str != ";" )
109  BESUtil::explode( ';', i_str, _include ) ;
110  }
111 
112  key = "BES.Catalog." + n + ".TypeMatch" ;
113  list<string> match_list ;
114  vals.clear() ;
115  TheBESKeys::TheKeys()->get_values( key, vals, found ) ;
116  if( !found || vals.size() == 0 )
117  {
118  string s = key + " not defined in key file" ;
119  throw BESInternalError( s, __FILE__, __LINE__ ) ;
120  }
121  vector<string>::iterator vi = vals.begin() ;
122  vector<string>::iterator ve = vals.end() ;
123  for( ; vi != ve; vi++ )
124  {
125  BESUtil::explode( ';', (*vi), match_list ) ;
126  }
127 
128  list<string>::iterator mli = match_list.begin() ;
129  list<string>::iterator mle = match_list.end() ;
130  for( ; mli != mle; mli++ )
131  {
132  if( !((*mli).empty()) && *(mli) != ";" )
133  {
134  list<string> amatch ;
135  BESUtil::explode( ':', (*mli), amatch ) ;
136  if( amatch.size() != 2 )
137  {
138  string s = (string)"Catalog type match malformed, "
139  + "looking for type:regexp;[type:regexp;]" ;
140  throw BESInternalError( s, __FILE__, __LINE__ ) ;
141  }
142  list<string>::iterator ami = amatch.begin() ;
143  type_reg newval ;
144  newval.type = (*ami) ;
145  ami++ ;
146  newval.reg = (*ami) ;
147  _match_list.push_back( newval ) ;
148  }
149  }
150 
151  key = (string)"BES.Catalog." + n + ".FollowSymLinks" ;
152  string s_str ;
153  TheBESKeys::TheKeys()->get_value( key, s_str, found ) ;
154  s_str = BESUtil::lowercase( s_str ) ;
155  if( s_str == "yes" || s_str == "on" || s_str == "true" )
156  {
157  _follow_syms = true ;
158  }
159 }
160 
161 #if 0
162 // While working on the gdal handler I was getting leak reports from BESCatalogUtils
163 // and I thought freeing the memory here might help. It didn't, but I thought I'd
164 // leave this in place just in case it's needed and/or as a reminder that valgrind
165 // is reporting memory 'definitely lost' from this class. jhrg 7/26/12
167 {
168  BESCatalogUtils *utils = BESCatalogUtils::_instances[_name] ;
169  if( !utils )
170  {
171  delete utils;
172  BESCatalogUtils::_instances[_name] = 0 ;
173  }
174 }
175 #endif
176 
177 bool
178 BESCatalogUtils::include( const string &inQuestion ) const
179 {
180  bool toInclude = false ;
181 
182  // First check the file against the include list. If the file should be
183  // included then check the exclude list to see if there are exceptions
184  // to the include list.
185  if( _include.size() == 0 )
186  {
187  toInclude = true ;
188  }
189  else
190  {
191  list<string>::const_iterator i_iter = _include.begin() ;
192  list<string>::const_iterator i_end = _include.end() ;
193  for( ; i_iter != i_end; i_iter++ )
194  {
195  string reg = *i_iter ;
196  if( !reg.empty() )
197  {
198  try
199  {
200  // must match exactly, meaing result is = to length of string
201  // in question
202  BESRegex reg_expr( reg.c_str() ) ;
203  if( reg_expr.match( inQuestion.c_str(),
204  inQuestion.length() ) ==
205  static_cast<int>(inQuestion.length()) )
206  {
207  toInclude = true ;
208  }
209  }
210  catch( BESError &e )
211  {
212  string serr = (string)"Unable to get catalog information, "
213  + "malformed Catalog Include parameter "
214  + "in bes configuration file around "
215  + reg + ": " + e.get_message() ;
216  throw BESInternalError( serr, __FILE__, __LINE__ ) ;
217  }
218  }
219  }
220  }
221 
222  if( toInclude == true )
223  {
224  if( exclude( inQuestion ) )
225  {
226  toInclude = false ;
227  }
228  }
229 
230  return toInclude ;
231 }
232 
233 bool
234 BESCatalogUtils::exclude( const string &inQuestion ) const
235 {
236  list<string>::const_iterator e_iter = _exclude.begin() ;
237  list<string>::const_iterator e_end = _exclude.end() ;
238  for( ; e_iter != e_end; e_iter++ )
239  {
240  string reg = *e_iter ;
241  if( !reg.empty() )
242  {
243  try
244  {
245  BESRegex reg_expr( reg.c_str() ) ;
246  if( reg_expr.match( inQuestion.c_str(), inQuestion.length() ) ==
247  static_cast<int>(inQuestion.length()) )
248  {
249  return true ;
250  }
251  }
252  catch( BESError &e )
253  {
254  string serr = (string)"Unable to get catalog information, "
255  + "malformed Catalog Exclude parameter "
256  + "in bes configuration file around "
257  + reg + ": " + e.get_message() ;
258  throw BESInternalError( serr, __FILE__, __LINE__ ) ;
259  }
260  }
261  }
262  return false ;
263 }
264 
267 {
268  return _match_list.begin() ;
269 }
270 
273 {
274  return _match_list.end() ;
275 }
276 
277 unsigned int
278 BESCatalogUtils::get_entries( DIR *dip, const string &fullnode,
279  const string &use_node, const string &coi,
280  BESCatalogEntry *entry,
281  bool dirs_only )
282 {
283  unsigned int cnt = 0 ;
284  struct stat cbuf ;
285  int statret = stat( fullnode.c_str(), &cbuf ) ;
286  int my_errno = errno ;
287  if( statret == 0 )
288  {
289  struct dirent *dit;
290  struct stat buf;
291  struct stat lbuf;
292 
293  while( ( dit = readdir( dip ) ) != NULL )
294  {
295  string dirEntry = dit->d_name ;
296  if( dirEntry != "." && dirEntry != ".." )
297  {
298  string fullPath = fullnode + "/" + dirEntry ;
299 
300  // if follow_sym_links is true then continue with
301  // the checking. If false, first see if the entry is
302  // a symbolic link. If it is, do not include in the
303  // listing for this node. If not, then continue
304  // checking the entry.
305  bool continue_checking = true ;
306  if( follow_sym_links() == false )
307  {
308 #if 0
309  int lstatret = lstat( fullPath.c_str(), &lbuf ) ;
310 #endif
311  (void)lstat( fullPath.c_str(), &lbuf ) ;
312  if( S_ISLNK( lbuf.st_mode ) )
313  {
314  continue_checking = false ;
315  }
316  }
317 
318  if( continue_checking )
319  {
320  // look at the mode and determine if this is a
321  // directory or a regular file. If it is not
322  // accessible, the stat fails, is not a directory
323  // or regular file, then simply do not include it.
324  statret = stat( fullPath.c_str(), &buf ) ;
325  if ( statret == 0 && S_ISDIR( buf.st_mode ) )
326  {
327  if( exclude( dirEntry ) == false )
328  {
329  BESCatalogEntry *curr_entry =
330  new BESCatalogEntry( dirEntry,
331  entry->get_catalog() ) ;
332 
333  bes_get_stat_info( curr_entry, buf ) ;
334 
335  entry->add_entry( curr_entry ) ;
336 
337  // we don't go further then this, so we need
338  // to add a blank node here so that we know
339  // it's a node (collection)
340  BESCatalogEntry *blank_entry =
341  new BESCatalogEntry( ".blank",
342  entry->get_catalog() ) ;
343  curr_entry->add_entry( blank_entry ) ;
344  }
345  }
346  else if ( statret == 0 && S_ISREG( buf.st_mode ) )
347  {
348  if( !dirs_only && include( dirEntry ) )
349  {
350  BESCatalogEntry *curr_entry =
351  new BESCatalogEntry( dirEntry,
352  entry->get_catalog() ) ;
353  bes_get_stat_info( curr_entry, buf ) ;
354 
355  list<string> services ;
356  isData( fullPath, _name, services ) ;
357  curr_entry->set_service_list( services ) ;
358 
359  bes_get_stat_info( curr_entry, buf ) ;
360 
361  entry->add_entry( curr_entry ) ;
362  }
363  }
364  }
365  }
366  }
367  }
368  else
369  {
370  // ENOENT means that the path or part of the path does not exist
371  if( my_errno == ENOENT )
372  {
373  string error = "Node " + use_node + " does not exist" ;
374  char *s_err = strerror( my_errno ) ;
375  if( s_err )
376  {
377  error = s_err ;
378  }
379  throw BESNotFoundError( error, __FILE__, __LINE__ ) ;
380  }
381  // any other error means that access is denied for some reason
382  else
383  {
384  string error = "Access denied for node " + use_node ;
385  char *s_err = strerror( my_errno ) ;
386  if( s_err )
387  {
388  error = error + s_err ;
389  }
390  throw BESNotFoundError( error, __FILE__, __LINE__ ) ;
391  }
392  }
393  return cnt ;
394 }
395 
396 void
398 {
399  string defcatname = BESCatalogList::TheCatalogList()->default_catalog() ;
400 
401  // start with the external entry
402  map<string,string> props ;
403  if( entry->get_catalog() == defcatname )
404  {
405  props["name"] = entry->get_name() ;
406  }
407  else
408  {
409  string name = entry->get_catalog() + "/" ;
410  if( entry->get_name() != "/" )
411  {
412  name = name + entry->get_name() ;
413  }
414  props["name"] = name ;
415  }
416  props["catalog"] = entry->get_catalog() ;
417  props["size"] = entry->get_size() ;
418  props["lastModified"] = entry->get_mod_date() + "T" + entry->get_mod_time();
419  if( entry->is_collection() )
420  {
421  props["node"] = "true" ;
422  ostringstream strm ;
423  strm << entry->get_count() ;
424  props["count"] = strm.str() ;
425  }
426  else
427  {
428  props["node"] = "false" ;
429  }
430  info->begin_tag( "dataset", &props ) ;
431 
432  list<string> services = entry->get_service_list() ;
433  if( services.size() )
434  {
435  list<string>::const_iterator si = services.begin() ;
436  list<string>::const_iterator se = services.end() ;
437  for( ; si != se; si++ )
438  {
439  info->add_tag( "serviceRef", (*si) ) ;
440  }
441  }
442 }
443 
444 void
446  const string &fullnode )
447 {
448  struct stat cbuf ;
449  int statret = stat( fullnode.c_str(), &cbuf ) ;
450  if( statret == 0 )
451  {
452  bes_get_stat_info( entry, cbuf ) ;
453  }
454 }
455 
456 void
457 BESCatalogUtils::bes_get_stat_info( BESCatalogEntry *entry,
458  struct stat &buf )
459 {
460  off_t sz = buf.st_size ;
461  entry->set_size( sz ) ;
462 
463  // %T = %H:%M:%S
464  // %F = %Y-%m-%d
465  time_t mod = buf.st_mtime ;
466  struct tm *stm = gmtime( &mod ) ;
467  char mdate[64] ;
468  strftime( mdate, 64, "%Y-%m-%d", stm ) ;
469  char mtime[64] ;
470  strftime( mtime, 64, "%T", stm ) ;
471 
472  ostringstream sdt ;
473  sdt << mdate ;
474  entry->set_mod_date( sdt.str() ) ;
475 
476  ostringstream stt ;
477  stt << mtime ;
478  entry->set_mod_time( stt.str() ) ;
479 }
480 
481 bool
482 BESCatalogUtils::isData( const string &inQuestion,
483  const string &catalog,
484  list<string> &services )
485 {
486  BESContainerStorage *store =
488  if( !store )
489  return false ;
490 
491  return store->isData( inQuestion, services ) ;
492 }
493 
494 void
495 BESCatalogUtils::dump( ostream &strm ) const
496 {
497  strm << BESIndent::LMarg << "BESCatalogUtils::dump - ("
498  << (void *)this << ")" << endl ;
500 
501  strm << BESIndent::LMarg << "root directory: " << _root_dir << endl ;
502 
503  if( _include.size() )
504  {
505  strm << BESIndent::LMarg << "include list:" << endl ;
507  list<string>::const_iterator i_iter = _include.begin() ;
508  list<string>::const_iterator i_end = _include.end() ;
509  for( ; i_iter != i_end; i_iter++ )
510  {
511  if( !(*i_iter).empty() )
512  {
513  strm << BESIndent::LMarg << *i_iter << endl ;
514  }
515  }
517  }
518  else
519  {
520  strm << BESIndent::LMarg << "include list: empty" << endl ;
521  }
522 
523  if( _exclude.size() )
524  {
525  strm << BESIndent::LMarg << "exclude list:" << endl ;
527  list<string>::const_iterator e_iter = _exclude.begin() ;
528  list<string>::const_iterator e_end = _exclude.end() ;
529  for( ; e_iter != e_end; e_iter++ )
530  {
531  if( !(*e_iter).empty() )
532  {
533  strm << BESIndent::LMarg << *e_iter << endl ;
534  }
535  }
537  }
538  else
539  {
540  strm << BESIndent::LMarg << "exclude list: empty" << endl ;
541  }
542 
543  if( _match_list.size() )
544  {
545  strm << BESIndent::LMarg << "type matches:" << endl ;
547  BESCatalogUtils::match_citer i = _match_list.begin() ;
548  BESCatalogUtils::match_citer ie = _match_list.end() ;
549  for( ; i != ie; i++ )
550  {
551  type_reg match = (*i) ;
552  strm << BESIndent::LMarg << match.type << " : "
553  << match.reg << endl ;
554  }
556  }
557  else
558  {
559  strm << BESIndent::LMarg << " type matches: empty" << endl ;
560  }
561 
562  if( _follow_syms )
563  {
564  strm << BESIndent::LMarg << " follow symbolic links: on" << endl ;
565  }
566  else
567  {
568  strm << BESIndent::LMarg << " follow symbolic links: off" << endl ;
569  }
570 
572 }
573 
575 BESCatalogUtils::Utils( const string &cat_name )
576 {
577  BESCatalogUtils *utils = BESCatalogUtils::_instances[cat_name] ;
578  if( !utils )
579  {
580  utils = new BESCatalogUtils( cat_name );
581  BESCatalogUtils::_instances[cat_name] = utils ;
582  }
583  return utils ;
584 }
585 
virtual list< string > get_service_list()
error thrown if the resource requested cannot be found
provides persistent storage for data storage information represented by a container.
BESCatalogUtils::match_citer match_list_begin() const
virtual ~BESCatalogUtils()
BESCatalogUtils::match_citer match_list_end() const
virtual string get_catalog()
exception thrown if inernal error encountered
virtual unsigned int get_count()
virtual BESContainerStorage * find_persistence(const string &persist_name)
find the persistence store with the given name
static string lowercase(const string &s)
Convert a string to all lower case.
Definition: BESUtil.cc:190
virtual void add_tag(const string &tag_name, const string &tag_data, map< string, string > *attrs=0)=0
a C++ interface to POSIX regular expression functions.
Definition: BESRegex.h:41
virtual void set_mod_date(const string &mod_date)
static void Indent()
Definition: BESIndent.cc:38
error thrown if there is a user syntax error in the request or any other user error ...
virtual void set_service_list(list< string > &slist)
vector< type_reg >::const_iterator match_citer
virtual void set_mod_time(const string &mod_time)
virtual string get_message()
get the error message for this exception
Definition: BESError.h:91
virtual bool isData(const string &inQuestion, list< string > &provides)=0
determine if the given container is data and what servies are available for it
virtual void add_entry(BESCatalogEntry *entry)
informational response object
Definition: BESInfo.h:68
Abstract exception class for the BES with basic string message.
Definition: BESError.h:51
static void bes_add_stat_info(BESCatalogEntry *entry, const string &fullnode)
virtual string default_catalog()
virtual string get_name()
virtual void begin_tag(const string &tag_name, map< string, string > *attrs=0)
Definition: BESInfo.cc:142
static ostream & LMarg(ostream &strm)
Definition: BESIndent.cc:73
bool follow_sym_links() const
static void explode(char delim, const string &str, list< string > &values)
explode a string into an array given a delimiter
Definition: BESUtil.cc:513
virtual void dump(ostream &strm) const
dump the contents of this object to the specified ostream
static bool isData(const string &inQuestion, const string &catalog, list< string > &services)
virtual string get_mod_time()
static void display_entry(BESCatalogEntry *entry, BESInfo *info)
void get_value(const string &s, string &val, bool &found)
Retrieve the value of a given key, if set.
Definition: BESKeys.cc:453
static BESContainerStorageList * TheList()
virtual void set_size(off_t size)
virtual bool include(const string &inQuestion) const
void get_values(const string &s, vector< string > &vals, bool &found)
Retrieve the values of a given key, if set.
Definition: BESKeys.cc:488
virtual string get_size()
virtual bool exclude(const string &inQuestion) const
static void UnIndent()
Definition: BESIndent.cc:44
static BESCatalogUtils * Utils(const string &name)
virtual unsigned int get_entries(DIR *dip, const string &fullnode, const string &use_node, const string &coi, BESCatalogEntry *entry, bool dirs_only)
virtual bool is_collection()
static BESCatalogList * TheCatalogList()
returns the singleton BESCatalogList instance
static BESKeys * TheKeys()
Definition: TheBESKeys.cc:48
virtual string get_mod_date()