OPeNDAP Hyrax Back End Server (BES)  Updated for version 3.8.3
BESCache.cc
Go to the documentation of this file.
1 // BESCache.cc
2 
3 // This file is part of bes, A C++ back-end server implementation framework
4 // for the OPeNDAP Data Access Protocol.
5 
6 // Copyright (c) 2004-2009 University Corporation for Atmospheric Research
7 // Author: Patrick West <pwest@ucar.edu> and Jose Garcia <jgarcia@ucar.edu>
8 //
9 // This library is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU Lesser General Public
11 // License as published by the Free Software Foundation; either
12 // version 2.1 of the License, or (at your option) any later version.
13 //
14 // This library is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 // Lesser General Public License for more details.
18 //
19 // You should have received a copy of the GNU Lesser General Public
20 // License along with this library; if not, write to the Free Software
21 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 //
23 // You can contact University Corporation for Atmospheric Research at
24 // 3080 Center Green Drive, Boulder, CO 80301
25 
26 // (c) COPYRIGHT University Corporation for Atmospheric Research 2004-2005
27 // Please read the full copyright statement in the file COPYRIGHT_UCAR.
28 //
29 // Authors:
30 // pwest Patrick West <pwest@ucar.edu>
31 // jgarcia Jose Garcia <jgarcia@ucar.edu>
32 
33 #include "config.h"
34 
35 #include <unistd.h> // for unlink
36 #include <sys/types.h>
37 #include <sys/stat.h>
38 #include <dirent.h>
39 #include <fcntl.h>
40 
41 #include <cstring>
42 #include <cerrno>
43 #include <iostream>
44 #include <sstream>
45 
46 #include "BESCache.h"
47 #include "TheBESKeys.h"
48 #include "BESSyntaxUserError.h"
49 #include "BESInternalError.h"
50 #include "BESDebug.h"
51 
52 using std::string;
53 using std::multimap ;
54 using std::pair ;
55 using std::greater ;
56 using std::endl ;
57 
58 // conversion factor
59 static const unsigned long long BYTES_PER_MEG = 1048576ULL;
60 
61 // Max cache size in megs, so we can check the user input and warn.
62 // 2^64 / 2^20 == 2^44
63 static const unsigned long long MAX_CACHE_SIZE_IN_MEGABYTES = (1ULL << 44);
64 
65 void
66 BESCache::check_ctor_params()
67 {
68  if( _cache_dir.empty() )
69  {
70  string err = "The cache directory was not specified, must be non-empty";
71  throw BESSyntaxUserError( err, __FILE__, __LINE__ ) ;
72  }
73 
74  struct stat buf;
75  int statret = stat( _cache_dir.c_str(), &buf ) ;
76  if( statret != 0 || ! S_ISDIR(buf.st_mode) )
77  {
78  string err = "The cache directory " + _cache_dir + " does not exist" ;
79  throw BESSyntaxUserError( err, __FILE__, __LINE__ ) ;
80  }
81 
82  if( _prefix.empty() )
83  {
84  string err = "The cache file prefix was not specified, must be non-empty" ;
85  throw BESSyntaxUserError( err, __FILE__, __LINE__ ) ;
86  }
87 
88  if( _cache_size_in_megs <= 0 )
89  {
90  string err = "The cache size was not specified, must be non-zero" ;
91  throw BESSyntaxUserError( err, __FILE__, __LINE__ ) ;
92  }
93 
94  // If the user specifies a cache that is too large,
95  // it is a user exception and we should tell them.
96  // Actually, this may not work since by this
97  // time we may have already overflowed the variable...
98  if( _cache_size_in_megs > MAX_CACHE_SIZE_IN_MEGABYTES )
99  {
100  _cache_size_in_megs = MAX_CACHE_SIZE_IN_MEGABYTES ;
101  std::ostringstream msg;
102  msg << "The specified cache size was larger than the max cache size of: "
103  << MAX_CACHE_SIZE_IN_MEGABYTES;
104  throw BESSyntaxUserError(msg.str(), __FILE__, __LINE__);
105  }
106 
107  BESDEBUG( "bes", "BES Cache: directory " << _cache_dir
108  << ", prefix " << _prefix
109  << ", max size " << _cache_size_in_megs << endl ) ;
110 }
111 
121 BESCache::BESCache( const string &cache_dir,
122  const string &prefix,
123  unsigned long long sizeInMegs )
124  : _cache_dir( cache_dir ),
125  _prefix( prefix ),
126  _cache_size_in_megs( sizeInMegs ),
127  _lock_fd( -1 )
128 {
129  check_ctor_params(); // Throws BESSyntaxUserError on error.
130 }
131 
146 BESCache::BESCache( BESKeys &keys,
147  const string &cache_dir_key,
148  const string &prefix_key,
149  const string &size_key )
150  : _cache_size_in_megs( 0 ),
151  _lock_fd( -1 )
152 {
153  bool found = false ;
154  keys.get_value( cache_dir_key, _cache_dir, found ) ;
155  if( !found )
156  {
157  string err = "The cache directory key " + cache_dir_key
158  + " was not found in the BES configuration file" ;
159  throw BESSyntaxUserError( err, __FILE__, __LINE__ ) ;
160  }
161 
162  found = false ;
163  keys.get_value( prefix_key, _prefix, found ) ;
164  if( !found )
165  {
166  string err = "The prefix key " + prefix_key
167  + " was not found in the BES configuration file" ;
168  throw BESSyntaxUserError( err, __FILE__, __LINE__ ) ;
169  }
170 
171  found = false ;
172  string cache_size_str ;
173  keys.get_value( size_key, cache_size_str, found ) ;
174  if( !found )
175  {
176  string err = "The size key " + size_key
177  + " was not found in the BES configuration file" ;
178  throw BESInternalError( err, __FILE__, __LINE__ ) ;
179  }
180 
181  std::istringstream is( cache_size_str ) ;
182  is >> _cache_size_in_megs ;
183 
184  check_ctor_params(); // Throws BESSyntaxUserError on error.
185 }
186 
193 bool
194 BESCache::lock( unsigned int retry, unsigned int num_tries )
195 {
196  // make sure we aren't retrying too many times
197  if( num_tries > MAX_LOCK_TRIES )
198  num_tries = MAX_LOCK_TRIES ;
199  if( retry > MAX_LOCK_RETRY_MS )
200  retry = MAX_LOCK_RETRY_MS ;
201 
202  bool got_lock = true ;
203  if( _lock_fd == -1 )
204  {
205  string lock_file = _cache_dir + "/lock" ;
206  unsigned int tries = 0 ;
207  _lock_fd = open( lock_file.c_str(),
208  O_CREAT | O_EXCL,
209  S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH ) ;
210  while( _lock_fd < 0 && got_lock )
211  {
212  tries ++ ;
213  if( tries > num_tries )
214  {
215  _lock_fd = -1 ;
216  got_lock = false ;
217  }
218  else
219  {
220  usleep( retry ) ;
221  _lock_fd = open( lock_file.c_str(),
222  O_CREAT | O_EXCL,
223  S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH ) ;
224  }
225  }
226  }
227  else
228  {
229  // This would be a programming error, or we've gotten into a
230  // situation where the lock is lost. Lock has been called on the
231  // same cache object twice in a row without an unlock being called.
232  string err = "The cache dir " + _cache_dir + " is already locked" ;
233  throw BESInternalError( err, __FILE__, __LINE__ ) ;
234  }
235 
236  return got_lock ;
237 }
238 
245 bool
247 {
248  // if we call unlock twice in a row, does it matter? I say no, just say
249  // that it is unlocked.
250  bool unlocked = true ;
251  if( _lock_fd != -1 )
252  {
253  string lock_file = _cache_dir + "/lock" ;
254  close( _lock_fd ) ;
255  (void)unlink( lock_file.c_str() ) ;
256  }
257 
258  _lock_fd = -1 ;
259 
260  return unlocked ;
261 }
262 
276 bool
277 BESCache::is_cached( const string &src, string &target )
278 {
279  bool is_it = true ;
280  string tmp_target = src ;
281 
282  // Create the file that would be created in the cache directory
283  //echo ${infile} | sed 's/^\///' | sed 's/\//#/g' | sed 's/\(.*\)\..*$/\1/g'
284  if( tmp_target.at(0) == '/' )
285  {
286  tmp_target = src.substr( 1, tmp_target.length() - 1 ) ;
287  }
288  string::size_type slash = 0 ;
289  while( ( slash = tmp_target.find( '/' ) ) != string::npos )
290  {
291  tmp_target.replace( slash, 1, 1, BES_CACHE_CHAR ) ;
292  }
293  string::size_type last_dot = tmp_target.rfind( '.' ) ;
294  if( last_dot != string::npos )
295  {
296  tmp_target = tmp_target.substr( 0, last_dot ) ;
297  }
298 
299  target = _cache_dir + "/" + _prefix + BES_CACHE_CHAR + tmp_target ;
300 
301  // Determine if the target file is already in the cache or not
302  struct stat buf;
303  int statret = stat( target.c_str(), &buf ) ;
304  if( statret != 0 )
305  {
306  is_it = false ;
307  }
308 
309  return is_it ;
310 }
311 
312 
313 
314 
323 void
325 {
326  // Fill in contents and get the info
327  CacheDirInfo cd_info;
328  collect_cache_dir_info(cd_info);
329  unsigned long long avg_size = cd_info.get_avg_size();
330 
331  // These are references in the refactor, probably would make
332  // sense to add these calls below to the info, but...
333  unsigned long long& size = cd_info._total_cache_files_size;
334  unsigned long long& num_files_in_cache = cd_info._num_files_in_cache;
335  BESCache::CacheFilesByAgeMap& contents = cd_info._contents;
336 
337  BESDEBUG( "bes", "cache size = " << size << endl ) ;
338  BESDEBUG( "bes", "avg size = " << avg_size << endl ) ;
339  BESDEBUG( "bes", "num files in cache = "
340  << num_files_in_cache << endl ) ;
341  if( BESISDEBUG( "bes" ) )
342  {
343  BESDEBUG( "bes", endl << "BEFORE" << endl ) ;
344  CacheFilesByAgeMap::iterator ti = contents.begin() ;
345  CacheFilesByAgeMap::iterator te = contents.end() ;
346  for( ; ti != te; ti++ )
347  {
348  BESDEBUG( "bes", (*ti).first << ": " << (*ti).second.name << ": size " << (*ti).second.size << endl ) ;
349  }
350  BESDEBUG( "bes", endl ) ;
351  }
352 
353 
354  // if the size of files is greater than max allowed then we need to
355  // purge the cache directory. Keep going until the size is less than
356  // the max.
357  // [Maybe change this to size + (fraction of max_size) > max_size?
358  // jhrg 5/9/07]
359  unsigned long long max_size_in_bytes = _cache_size_in_megs * BYTES_PER_MEG ; // Bytes/Meg
360  while( (size+avg_size) > max_size_in_bytes )
361  {
362  // Grab the first which is the oldest
363  // in terms of access time.
364  CacheFilesByAgeMap::iterator i = contents.begin() ;
365 
366  // if we've deleted all entries, exit the loop
367  if( i == contents.end() )
368  {
369  break;
370  }
371 
372  // Otherwise, remove the file with unlink
373  BESDEBUG( "bes", "BESCache::purge - removing "
374  << (*i).second.name << endl ) ;
375  // unlink rather than remove in case the file is in use
376  // by a forked BES process
377  if( unlink( (*i).second.name.c_str() ) != 0 )
378  {
379  char *s_err = strerror( errno ) ;
380  string err = "Unable to remove the file "
381  + (*i).second.name
382  + " from the cache: " ;
383  if( s_err )
384  {
385  err.append( s_err ) ;
386  }
387  else
388  {
389  err.append( "Unknown error" ) ;
390  }
391  throw BESInternalError( err, __FILE__, __LINE__ ) ;
392  }
393 
394  size -= (*i).second.size ;
395  contents.erase( i ) ;
396  }
397 
398  if( BESISDEBUG( "bes" ) )
399  {
400  BESDEBUG( "bes", endl << "AFTER" << endl ) ;
401  CacheFilesByAgeMap::iterator ti = contents.begin() ;
402  CacheFilesByAgeMap::iterator te = contents.end() ;
403  for( ; ti != te; ti++ )
404  {
405  BESDEBUG( "bes", (*ti).first << ": " << (*ti).second.name << ": size " << (*ti).second.size << endl ) ;
406  }
407  }
408 }
409 
410 // Local RAII helper class to be sure the DIR
411 // is closed in the face of exceptions using RAII
412 struct DIR_Wrapper
413 {
414  DIR_Wrapper(const std::string& dir_name)
415  {
416  _dip = opendir(dir_name.c_str());
417  }
418 
419  ~DIR_Wrapper()
420  {
421  close();
422  }
423 
424  DIR* get() const { return _dip; }
425 
426  void close()
427  {
428  if (_dip)
429  {
430  closedir(_dip);
431  _dip = NULL;
432  }
433  }
434 
435  // data rep
436  DIR* _dip;
437 };
438 
439 void
441  BESCache::CacheDirInfo& cd_info // output
442  ) const
443 {
444  // start fresh
445  cd_info.clear();
446 
447  time_t curr_time = time( NULL ) ; // grab the current time so we can
448  // determine the oldest file
449 
450  DIR_Wrapper dip = DIR_Wrapper( _cache_dir );
451  if (! (dip.get()) )
452  {
453  string err = "Unable to open cache directory " + _cache_dir ;
454  throw BESInternalError( err, __FILE__, __LINE__ ) ;
455  }
456  else // got a dir entry so count up the cached files
457  {
458  struct stat buf;
459  struct dirent *dit;
460  // go through the cache directory and collect all of the files that
461  // start with the matching prefix
462  while( ( dit = readdir( dip.get() ) ) != NULL )
463  {
464  string dirEntry = dit->d_name ;
465  if( dirEntry.compare( 0, _prefix.length(), _prefix ) == 0)
466  {
467  // Now that we have found a match we want to get the size of
468  // the file and the last access time from the file.
469  string fullPath = _cache_dir + "/" + dirEntry ;
470  int statret = stat( fullPath.c_str(), &buf ) ;
471  if( statret == 0 )
472  {
473  cd_info._total_cache_files_size += buf.st_size ;
474 
475  // Find out how old the file is
476  time_t file_time = buf.st_atime ;
477 
478  // I think we can use the access time without the diff,
479  // since it's the relative ages that determine when to
480  // delete a file. Good idea to use the access time so
481  // recently used (read) files will linger. jhrg 5/9/07
482  double time_diff = difftime( curr_time, file_time ) ;
483  cache_entry entry ;
484  entry.name = fullPath ;
485  entry.size = buf.st_size ;
486  cd_info._contents.insert( pair<double, cache_entry>( time_diff, entry ) );
487  }
488  cd_info._num_files_in_cache++ ;
489  }
490  }
491  }
492 
493  dip.close();
494 }
495 
503 void
504 BESCache::dump( ostream &strm ) const
505 {
506  strm << BESIndent::LMarg << "BESCache::dump - ("
507  << (void *)this << ")" << endl ;
509  strm << BESIndent::LMarg << "cache dir: " << _cache_dir << endl ;
510  strm << BESIndent::LMarg << "prefix: " << _prefix << endl ;
511  strm << BESIndent::LMarg << "size (mb): " << _cache_size_in_megs << endl ;
513 }
514 
#define BESISDEBUG(x)
macro used to determine if the specified debug context is set
Definition: BESDebug.h:83
exception thrown if inernal error encountered
virtual bool lock(unsigned int retry_ms, unsigned int num_tries)
lock the cache using a file lock
Definition: BESCache.cc:194
virtual void purge()
Check to see if the cache size exceeds the size specified in the constructor and purge older files un...
Definition: BESCache.cc:324
BESCache::CacheFilesByAgeMap _contents
Definition: BESCache.h:111
virtual void dump(ostream &strm) const
dumps information about this object
Definition: BESCache.cc:504
unsigned long long _total_cache_files_size
Definition: BESCache.h:109
std::multimap< double, cache_entry, std::greater< double > > CacheFilesByAgeMap
Sugar for the multimap of entries sorted with older files first.
Definition: BESCache.h:71
for filename -> filesize map below
Definition: BESCache.h:63
static void Indent()
Definition: BESIndent.cc:38
error thrown if there is a user syntax error in the request or any other user error ...
Helper class for info on the cache directory.
Definition: BESCache.h:74
unsigned long long get_avg_size() const
Definition: BESCache.h:94
void collect_cache_dir_info(BESCache::CacheDirInfo &cd_info) const
Definition: BESCache.cc:440
mapping of key/value pairs defining different behaviors of an application.
Definition: BESKeys.h:84
unsigned long long _num_files_in_cache
Definition: BESCache.h:110
static ostream & LMarg(ostream &strm)
Definition: BESIndent.cc:73
virtual bool unlock()
unlock the cache
Definition: BESCache.cc:246
void get_value(const string &s, string &val, bool &found)
Retrieve the value of a given key, if set.
Definition: BESKeys.cc:453
virtual bool is_cached(const string &src, string &target)
Determine if the file specified by src is cached.
Definition: BESCache.cc:277
unsigned long long size
Definition: BESCache.h:66
#define BESDEBUG(x, y)
macro used to send debug information to the debug stream
Definition: BESDebug.h:64
static void UnIndent()
Definition: BESIndent.cc:44
string name
Definition: BESCache.h:65