00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026 #include "config.h"
00027
00028
00029
00030
00031
00032 #include <pthread.h>
00033 #include <limits.h>
00034 #include <unistd.h>
00035 #include <sys/types.h>
00036 #include <sys/stat.h>
00037
00038 #include <cstring>
00039 #include <iostream>
00040 #include <sstream>
00041 #include <algorithm>
00042 #include <iterator>
00043 #include <set>
00044
00045 #include "Error.h"
00046 #include "InternalErr.h"
00047 #include "ResponseTooBigErr.h"
00048 #ifndef WIN32
00049 #include "SignalHandler.h"
00050 #endif
00051 #include "HTTPCacheInterruptHandler.h"
00052 #include "HTTPCacheTable.h"
00053
00054 #include "util_mit.h"
00055 #include "debug.h"
00056
00057 #ifdef WIN32
00058 #include <direct.h>
00059 #include <time.h>
00060 #include <fcntl.h>
00061 #define MKDIR(a,b) _mkdir((a))
00062 #define REMOVE(a) remove((a))
00063 #define MKSTEMP(a) _open(_mktemp((a)),_O_CREAT,_S_IREAD|_S_IWRITE)
00064 #define DIR_SEPARATOR_CHAR '\\'
00065 #define DIR_SEPARATOR_STR "\\"
00066 #else
00067 #define MKDIR(a,b) mkdir((a), (b))
00068 #define REMOVE(a) remove((a))
00069 #define MKSTEMP(a) mkstemp((a))
00070 #define DIR_SEPARATOR_CHAR '/'
00071 #define DIR_SEPARATOR_STR "/"
00072 #endif
00073
00074 #define CACHE_META ".meta"
00075 #define CACHE_INDEX ".index"
00076 #define CACHE_EMPTY_ETAG "@cache@"
00077
00078 #define NO_LM_EXPIRATION 24*3600 // 24 hours
00079 #define MAX_LM_EXPIRATION 48*3600 // Max expiration from LM
00080
00081 // If using LM to find the expiration then take 10% and no more than
00082 // MAX_LM_EXPIRATION.
00083 #ifndef LM_EXPIRATION
00084 #define LM_EXPIRATION(t) (min((MAX_LM_EXPIRATION), static_cast<int>((t) / 10)))
00085 #endif
00086
00087 const int CACHE_TABLE_SIZE = 1499;
00088
00089 using namespace std;
00090
00091 namespace libdap {
00092
00096 int
00097 get_hash(const string &url)
00098 {
00099 int hash = 0;
00100
00101 for (const char *ptr = url.c_str(); *ptr; ptr++)
00102 hash = (int)((hash * 3 + (*(unsigned char *)ptr)) % CACHE_TABLE_SIZE);
00103
00104 return hash;
00105 }
00106
00107 HTTPCacheTable::HTTPCacheTable(const string &cache_root, int block_size) :
00108 d_cache_root(cache_root),
00109 d_block_size(block_size),
00110 d_current_size(0),
00111 d_new_entries(0)
00112 {
00113 d_cache_index = cache_root + CACHE_INDEX;
00114
00115 d_cache_table = new CacheEntries*[CACHE_TABLE_SIZE];
00116
00117 // Initialize the cache table.
00118 for (int i = 0; i < CACHE_TABLE_SIZE; ++i)
00119 d_cache_table[i] = 0;
00120
00121 cache_index_read();
00122 }
00123
00127 static inline void
00128 delete_cache_entry(HTTPCacheTable::CacheEntry *e)
00129 {
00130 DBG2(cerr << "Deleting CacheEntry: " << e << endl);
00131 #if 0
00132 DESTROY(&e->get_lock());
00133 #endif
00134 delete e;
00135 }
00136
00137 HTTPCacheTable::~HTTPCacheTable() {
00138 for (int i = 0; i < CACHE_TABLE_SIZE; ++i) {
00139 HTTPCacheTable::CacheEntries *cp = get_cache_table()[i];
00140 if (cp) {
00141
00142 for_each(cp->begin(), cp->end(), delete_cache_entry);
00143
00144
00145 delete get_cache_table()[i];
00146 get_cache_table()[i] = 0;
00147 }
00148 }
00149
00150 delete[] d_cache_table;
00151 }
00152
00160 class DeleteExpired : public unary_function<HTTPCacheTable::CacheEntry *&, void> {
00161 time_t d_time;
00162 HTTPCacheTable &d_table;
00163
00164 public:
00165 DeleteExpired(HTTPCacheTable &table, time_t t) :
00166 d_time(t), d_table(table) {
00167 if (!t)
00168 d_time = time(0);
00169 }
00170
00171 void operator()(HTTPCacheTable::CacheEntry *&e) {
00172 if (e && !e->readers && (e->freshness_lifetime
00173 < (e->corrected_initial_age + (d_time - e->response_time)))) {
00174 DBG(cerr << "Deleting expired cache entry: " << e->url << endl);
00175 d_table.remove_cache_entry(e);
00176 delete e; e = 0;
00177 }
00178 }
00179 };
00180
00181
00182 void HTTPCacheTable::delete_expired_entries(time_t time) {
00183
00184 for (int cnt = 0; cnt < CACHE_TABLE_SIZE; cnt++) {
00185 HTTPCacheTable::CacheEntries *slot = get_cache_table()[cnt];
00186 if (slot) {
00187 for_each(slot->begin(), slot->end(), DeleteExpired(*this, time));
00188 slot->erase(remove(slot->begin(), slot->end(),
00189 static_cast<HTTPCacheTable::CacheEntry *>(0)), slot->end());
00190 }
00191 }
00192 }
00193
00200 class DeleteByHits : public unary_function<HTTPCacheTable::CacheEntry *&, void> {
00201 HTTPCacheTable &d_table;
00202 int d_hits;
00203
00204 public:
00205 DeleteByHits(HTTPCacheTable &table, int hits) :
00206 d_table(table), d_hits(hits) {
00207 }
00208
00209 void operator()(HTTPCacheTable::CacheEntry *&e) {
00210 if (e && !e->readers && e->hits <= d_hits) {
00211 DBG(cerr << "Deleting cache entry: " << e->url << endl);
00212 d_table.remove_cache_entry(e);
00213 delete e; e = 0;
00214 }
00215 }
00216 };
00217
00218 void
00219 HTTPCacheTable::delete_by_hits(int hits) {
00220 for (int cnt = 0; cnt < CACHE_TABLE_SIZE; cnt++) {
00221 if (get_cache_table()[cnt]) {
00222 HTTPCacheTable::CacheEntries *slot = get_cache_table()[cnt];
00223 for_each(slot->begin(), slot->end(), DeleteByHits(*this, hits));
00224 slot->erase(remove(slot->begin(), slot->end(),
00225 static_cast<HTTPCacheTable::CacheEntry*>(0)),
00226 slot->end());
00227
00228 }
00229 }
00230 }
00231
00236 class DeleteBySize : public unary_function<HTTPCacheTable::CacheEntry *&, void> {
00237 HTTPCacheTable &d_table;
00238 unsigned int d_size;
00239
00240 public:
00241 DeleteBySize(HTTPCacheTable &table, unsigned int size) :
00242 d_table(table), d_size(size) {
00243 }
00244
00245 void operator()(HTTPCacheTable::CacheEntry *&e) {
00246 if (e && !e->readers && e->size > d_size) {
00247 DBG(cerr << "Deleting cache entry: " << e->url << endl);
00248 d_table.remove_cache_entry(e);
00249 delete e; e = 0;
00250 }
00251 }
00252 };
00253
00254 void HTTPCacheTable::delete_by_size(unsigned int size) {
00255 for (int cnt = 0; cnt < CACHE_TABLE_SIZE; cnt++) {
00256 if (get_cache_table()[cnt]) {
00257 HTTPCacheTable::CacheEntries *slot = get_cache_table()[cnt];
00258 for_each(slot->begin(), slot->end(), DeleteBySize(*this, size));
00259 slot->erase(remove(slot->begin(), slot->end(),
00260 static_cast<HTTPCacheTable::CacheEntry*>(0)),
00261 slot->end());
00262
00263 }
00264 }
00265 }
00266
00273
00280 bool
00281 HTTPCacheTable::cache_index_delete()
00282 {
00283 d_new_entries = 0;
00284
00285 return (REMOVE(d_cache_index.c_str()) == 0);
00286 }
00287
00296 bool
00297 HTTPCacheTable::cache_index_read()
00298 {
00299 FILE *fp = fopen(d_cache_index.c_str(), "r");
00300
00301
00302 if (!fp) {
00303 return false;
00304 }
00305
00306 char line[1024];
00307 while (!feof(fp) && fgets(line, 1024, fp)) {
00308 add_entry_to_cache_table(cache_index_parse_line(line));
00309 DBG2(cerr << line << endl);
00310 }
00311
00312 int res = fclose(fp) ;
00313 if (res) {
00314 DBG(cerr << "HTTPCache::cache_index_read - Failed to close " << (void *)fp << endl);
00315 }
00316
00317 d_new_entries = 0;
00318
00319 return true;
00320 }
00321
00329 HTTPCacheTable::CacheEntry *
00330 HTTPCacheTable::cache_index_parse_line(const char *line)
00331 {
00332
00333 HTTPCacheTable::CacheEntry *entry = new HTTPCacheTable::CacheEntry;
00334 #if 0
00335 INIT(&entry->d_lock);
00336 #endif
00337 istringstream iss(line);
00338 iss >> entry->url;
00339 iss >> entry->cachename;
00340
00341 iss >> entry->etag;
00342 if (entry->etag == CACHE_EMPTY_ETAG)
00343 entry->etag = "";
00344
00345 iss >> entry->lm;
00346 iss >> entry->expires;
00347 iss >> entry->size;
00348 iss >> entry->range;
00349
00350 iss >> entry->hash;
00351 iss >> entry->hits;
00352 iss >> entry->freshness_lifetime;
00353 iss >> entry->response_time;
00354 iss >> entry->corrected_initial_age;
00355
00356 iss >> entry->must_revalidate;
00357
00358 return entry;
00359 }
00360
00363 class WriteOneCacheEntry :
00364 public unary_function<HTTPCacheTable::CacheEntry *, void>
00365 {
00366
00367 FILE *d_fp;
00368
00369 public:
00370 WriteOneCacheEntry(FILE *fp) : d_fp(fp)
00371 {}
00372
00373 void operator()(HTTPCacheTable::CacheEntry *e)
00374 {
00375 if (e && fprintf(d_fp,
00376 "%s %s %s %ld %ld %ld %c %d %d %ld %ld %ld %c\r\n",
00377 e->url.c_str(),
00378 e->cachename.c_str(),
00379 e->etag == "" ? CACHE_EMPTY_ETAG : e->etag.c_str(),
00380 (long)(e->lm),
00381 (long)(e->expires),
00382 e->size,
00383 e->range ? '1' : '0',
00384 e->hash,
00385 e->hits,
00386 (long)(e->freshness_lifetime),
00387 (long)(e->response_time),
00388 (long)(e->corrected_initial_age),
00389 e->must_revalidate ? '1' : '0') < 0)
00390 throw Error("Cache Index. Error writing cache index\n");
00391 }
00392 };
00393
00403 void
00404 HTTPCacheTable::cache_index_write()
00405 {
00406 DBG(cerr << "Cache Index. Writing index " << d_cache_index << endl);
00407
00408
00409 FILE * fp = NULL;
00410 if ((fp = fopen(d_cache_index.c_str(), "wb")) == NULL) {
00411 throw Error(string("Cache Index. Can't open `") + d_cache_index
00412 + string("' for writing"));
00413 }
00414
00415
00416
00417
00418 for (int cnt = 0; cnt < CACHE_TABLE_SIZE; cnt++) {
00419 HTTPCacheTable::CacheEntries *cp = get_cache_table()[cnt];
00420 if (cp)
00421 for_each(cp->begin(), cp->end(), WriteOneCacheEntry(fp));
00422 }
00423
00424
00425 int res = fclose(fp);
00426 if (res) {
00427 DBG(cerr << "HTTPCache::cache_index_write - Failed to close "
00428 << (void *)fp << endl);
00429 }
00430
00431 d_new_entries = 0;
00432 }
00433
00435
00448 string
00449 HTTPCacheTable::create_hash_directory(int hash)
00450 {
00451 struct stat stat_info;
00452 ostringstream path;
00453
00454 path << d_cache_root << hash;
00455 string p = path.str();
00456
00457 if (stat(p.c_str(), &stat_info) == -1) {
00458 DBG2(cerr << "Cache....... Create dir " << p << endl);
00459 if (MKDIR(p.c_str(), 0777) < 0) {
00460 DBG2(cerr << "Cache....... Can't create..." << endl);
00461 throw Error("Could not create cache slot to hold response! Check the write permissions on your disk cache directory. Cache root: " + d_cache_root + ".");
00462 }
00463 }
00464 else {
00465 DBG2(cerr << "Cache....... Directory " << p << " already exists"
00466 << endl);
00467 }
00468
00469 return p;
00470 }
00471
00486 void
00487 HTTPCacheTable::create_location(HTTPCacheTable::CacheEntry *entry)
00488 {
00489 string hash_dir = create_hash_directory(entry->hash);
00490 #ifdef WIN32
00491 hash_dir += "\\dodsXXXXXX";
00492 #else
00493 hash_dir += "/dodsXXXXXX";
00494 #endif
00495
00496
00497 char *templat = new char[hash_dir.size() + 1];
00498 strcpy(templat, hash_dir.c_str());
00499
00500
00501
00502
00503
00504 int fd = MKSTEMP(templat);
00505 if (fd < 0) {
00506 delete[] templat; templat = 0;
00507 close(fd);
00508 throw Error("The HTTP Cache could not create a file to hold the response; it will not be cached.");
00509 }
00510
00511 entry->cachename = templat;
00512 delete[] templat; templat = 0;
00513 close(fd);
00514 }
00515
00516
00518 static inline int
00519 entry_disk_space(int size, unsigned int block_size)
00520 {
00521 unsigned int num_of_blocks = (size + block_size) / block_size;
00522
00523 DBG(cerr << "size: " << size << ", block_size: " << block_size
00524 << ", num_of_blocks: " << num_of_blocks << endl);
00525
00526 return num_of_blocks * block_size;
00527 }
00528
00532
00538 void
00539 HTTPCacheTable::add_entry_to_cache_table(CacheEntry *entry)
00540 {
00541 int hash = entry->hash;
00542
00543 if (!d_cache_table[hash])
00544 d_cache_table[hash] = new CacheEntries;
00545
00546 d_cache_table[hash]->push_back(entry);
00547
00548 DBG(cerr << "add_entry_to_cache_table, current_size: " << d_current_size
00549 << ", entry->size: " << entry->size << ", block size: " << d_block_size
00550 << endl);
00551
00552 d_current_size += entry_disk_space(entry->size, d_block_size);
00553
00554 DBG(cerr << "add_entry_to_cache_table, current_size: " << d_current_size << endl);
00555
00556 increment_new_entries();
00557 }
00558
00562 HTTPCacheTable::CacheEntry *
00563 HTTPCacheTable::get_locked_entry_from_cache_table(const string &url)
00564 {
00565 return get_locked_entry_from_cache_table(get_hash(url), url);
00566 }
00567
00575 HTTPCacheTable::CacheEntry *
00576 HTTPCacheTable::get_locked_entry_from_cache_table(int hash, const string &url)
00577 {
00578 DBG(cerr << "url: " << url << "; hash: " << hash << endl);
00579 DBG(cerr << "d_cache_table: " << hex << d_cache_table << dec << endl);
00580 if (d_cache_table[hash]) {
00581 CacheEntries *cp = d_cache_table[hash];
00582 for (CacheEntriesIter i = cp->begin(); i != cp->end(); ++i) {
00583
00584
00585 if ((*i) && (*i)->url == url) {
00586 (*i)->lock_read_response();
00587 #if 0
00588 (*i)->lock();
00589 #endif
00590 return *i;
00591 }
00592 }
00593 }
00594
00595 return 0;
00596 }
00597
00605 HTTPCacheTable::CacheEntry *
00606 HTTPCacheTable::get_write_locked_entry_from_cache_table(const string &url)
00607 {
00608 int hash = get_hash(url);
00609 if (d_cache_table[hash]) {
00610 CacheEntries *cp = d_cache_table[hash];
00611 for (CacheEntriesIter i = cp->begin(); i != cp->end(); ++i) {
00612
00613
00614 if ((*i) && (*i)->url == url) {
00615 (*i)->lock_write_response();
00616 #if 0
00617 (*i)->lock();
00618 #endif
00619 return *i;
00620 }
00621 }
00622 }
00623
00624 return 0;
00625 }
00626
00634 void
00635 HTTPCacheTable::remove_cache_entry(HTTPCacheTable::CacheEntry *entry)
00636 {
00637
00638
00639 if (entry->readers)
00640 throw InternalErr(__FILE__, __LINE__, "Tried to delete a cache entry that is in use.");
00641
00642 REMOVE(entry->cachename.c_str());
00643 REMOVE(string(entry->cachename + CACHE_META).c_str());
00644
00645 DBG(cerr << "remove_cache_entry, current_size: " << get_current_size() << endl);
00646
00647 unsigned int eds = entry_disk_space(entry->size, get_block_size());
00648 set_current_size((eds > get_current_size()) ? 0 : get_current_size() - eds);
00649
00650 DBG(cerr << "remove_cache_entry, current_size: " << get_current_size() << endl);
00651 }
00652
00655 class DeleteCacheEntry: public unary_function<HTTPCacheTable::CacheEntry *&, void>
00656 {
00657 string d_url;
00658 HTTPCacheTable *d_cache_table;
00659
00660 public:
00661 DeleteCacheEntry(HTTPCacheTable *c, const string &url)
00662 : d_url(url), d_cache_table(c)
00663 {}
00664
00665 void operator()(HTTPCacheTable::CacheEntry *&e)
00666 {
00667 if (e && e->url == d_url) {
00668 e->lock_write_response();
00669 d_cache_table->remove_cache_entry(e);
00670 e->unlock_write_response();
00671 delete e; e = 0;
00672 }
00673 }
00674 };
00675
00682 void
00683 HTTPCacheTable::remove_entry_from_cache_table(const string &url)
00684 {
00685 int hash = get_hash(url);
00686 if (d_cache_table[hash]) {
00687 CacheEntries *cp = d_cache_table[hash];
00688 for_each(cp->begin(), cp->end(), DeleteCacheEntry(this, url));
00689 cp->erase(remove(cp->begin(), cp->end(), static_cast<HTTPCacheTable::CacheEntry*>(0)),
00690 cp->end());
00691 }
00692 }
00693
00696 class DeleteUnlockedCacheEntry :
00697 public unary_function<HTTPCacheTable::CacheEntry *&, void> {
00698 HTTPCacheTable &d_table;
00699
00700 public:
00701 DeleteUnlockedCacheEntry(HTTPCacheTable &t) :
00702 d_table(t) {
00703 }
00704 void operator()(HTTPCacheTable::CacheEntry *&e) {
00705 if (e) {
00706 d_table.remove_cache_entry(e);
00707 delete e; e = 0;
00708 }
00709 }
00710 };
00711
00712 void HTTPCacheTable::delete_all_entries() {
00713
00714
00715 for (int cnt = 0; cnt < CACHE_TABLE_SIZE; cnt++) {
00716 HTTPCacheTable::CacheEntries *slot = get_cache_table()[cnt];
00717 if (slot) {
00718 for_each(slot->begin(), slot->end(), DeleteUnlockedCacheEntry(*this));
00719 slot->erase(remove(slot->begin(), slot->end(), static_cast<HTTPCacheTable::CacheEntry *>(0)),
00720 slot->end());
00721 }
00722 }
00723
00724 cache_index_delete();
00725 }
00726
00738 void
00739 HTTPCacheTable::calculate_time(HTTPCacheTable::CacheEntry *entry, int default_expiration, time_t request_time)
00740 {
00741 entry->response_time = time(NULL);
00742 time_t apparent_age = max(0, static_cast<int>(entry->response_time - entry->date));
00743 time_t corrected_received_age = max(apparent_age, entry->age);
00744 time_t response_delay = entry->response_time - request_time;
00745 entry->corrected_initial_age = corrected_received_age + response_delay;
00746
00747
00748
00749
00750 time_t freshness_lifetime = entry->max_age;
00751 if (freshness_lifetime < 0) {
00752 if (entry->expires < 0) {
00753 if (entry->lm < 0) {
00754 freshness_lifetime = default_expiration;
00755 }
00756 else {
00757 freshness_lifetime = LM_EXPIRATION(entry->date - entry->lm);
00758 }
00759 }
00760 else
00761 freshness_lifetime = entry->expires - entry->date;
00762 }
00763
00764 entry->freshness_lifetime = max(0, static_cast<int>(freshness_lifetime));
00765
00766 DBG2(cerr << "Cache....... Received Age " << entry->age
00767 << ", corrected " << entry->corrected_initial_age
00768 << ", freshness lifetime " << entry->freshness_lifetime << endl);
00769 }
00770
00781 void HTTPCacheTable::parse_headers(HTTPCacheTable::CacheEntry *entry,
00782 unsigned long max_entry_size, const vector<string> &headers) {
00783 vector<string>::const_iterator i;
00784 for (i = headers.begin(); i != headers.end(); ++i) {
00785
00786 if ((*i).empty())
00787 continue;
00788
00789 string::size_type colon = (*i).find(':');
00790
00791
00792 if (colon == string::npos)
00793 continue;
00794
00795 string header = (*i).substr(0, (*i).find(':'));
00796 string value = (*i).substr((*i).find(": ") + 2);
00797 DBG2(cerr << "Header: " << header << endl);DBG2(cerr << "Value: " << value << endl);
00798
00799 if (header == "ETag") {
00800 entry->etag = value;
00801 } else if (header == "Last-Modified") {
00802 entry->lm = parse_time(value.c_str());
00803 } else if (header == "Expires") {
00804 entry->expires = parse_time(value.c_str());
00805 } else if (header == "Date") {
00806 entry->date = parse_time(value.c_str());
00807 } else if (header == "Age") {
00808 entry->age = parse_time(value.c_str());
00809 } else if (header == "Content-Length") {
00810 unsigned long clength = strtoul(value.c_str(), 0, 0);
00811 if (clength > max_entry_size)
00812 entry->set_no_cache(true);
00813 } else if (header == "Cache-Control") {
00814
00815
00816
00817 if (value == "no-cache" || value == "no-store")
00818
00819
00820
00821 entry->set_no_cache(true);
00822 else if (value == "must-revalidate")
00823 entry->must_revalidate = true;
00824 else if (value.find("max-age") != string::npos) {
00825 string max_age = value.substr(value.find("=" + 1));
00826 entry->max_age = parse_time(max_age.c_str());
00827 }
00828 }
00829 }
00830 }
00831
00833
00834
00835 void HTTPCacheTable::bind_entry_to_data(HTTPCacheTable::CacheEntry *entry, FILE *body) {
00836 entry->hits++;
00837 d_locked_entries[body] = entry;
00838 #if 0
00839 entry->unlock();
00840 #endif
00841 }
00842
00843 void HTTPCacheTable::uncouple_entry_from_data(FILE *body) {
00844 HTTPCacheTable::CacheEntry *entry = d_locked_entries[body];
00845 if (!entry)
00846 throw InternalErr("There is no cache entry for the response given.");
00847
00848 d_locked_entries.erase(body);
00849 entry->unlock_read_response();
00850
00851 if (entry->readers < 0)
00852 throw InternalErr("An unlocked entry was released");
00853 }
00854
00855 bool HTTPCacheTable::is_locked_read_responses() {
00856 return !d_locked_entries.empty();
00857 }
00858
00859 }