SSL mod_cache_disk.c
Interaktion und PortierbarkeitC
/* Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License.
*/
/* * mod_cache_disk: Disk Based HTTP 1.1 Cache. * * Flow to Find the .data file: * Incoming client requests URI /foo/bar/baz * Generate <hash> off of /foo/bar/baz * Open <hash>.header * Read in <hash>.header file (may contain Format #1 or Format #2) * If format #1 (Contains a list of Vary Headers): * Use each header name (from .header) with our request values (headers_in) to * regenerate <hash> using HeaderName+HeaderValue+.../foo/bar/baz * re-read in <hash>.header (must be format #2) * read in <hash>.data * * Format #1: * apr_uint32_t format; * apr_time_t expire; * apr_array_t vary_headers (delimited by CRLF) * * Format #2: * disk_cache_info_t (first sizeof(apr_uint32_t) bytes is the format) * entity name (dobj->name) [length is in disk_cache_info_t->name_len] * r->headers_out (delimited by CRLF) * CRLF * r->headers_in (delimited by CRLF) * CRLF
*/
/* htcacheclean may remove directories underneath us. * So, we'll try renaming three times at a cost of 0.002 seconds.
*/ static apr_status_t safe_file_rename(disk_cache_conf *conf, constchar *src, constchar *dest,
apr_pool_t *pool)
{
apr_status_t rv;
rv = apr_file_rename(src, dest, pool);
if (rv != APR_SUCCESS) { int i;
for (i = 0; i < 2 && rv != APR_SUCCESS; i++) { /* 1000 micro-seconds aka 0.001 seconds. */
apr_sleep(1000);
rv = mkdir_structure(conf, dest, pool); if (rv != APR_SUCCESS) continue;
/* These two functions get and put state information into the data * file for an ap_cache_el, this state information will be read * and written transparent to clients of this module
*/ staticint file_cache_recall_mydata(apr_file_t *fd, cache_info *info,
disk_cache_object_t *dobj, request_rec *r)
{
apr_status_t rv; char *urlbuff;
apr_size_t len;
/* read the data from the cache file */
len = sizeof(disk_cache_info_t);
rv = apr_file_read_full(fd, &dobj->disk_info, len, &len); if (rv != APR_SUCCESS) { return rv;
}
/* Store it away so we can get it later. */
info->status = dobj->disk_info.status;
info->date = dobj->disk_info.date;
info->expire = dobj->disk_info.expire;
info->request_time = dobj->disk_info.request_time;
info->response_time = dobj->disk_info.response_time;
/* Note that we could optimize this by conditionally doing the palloc
* depending upon the size. */
urlbuff = apr_palloc(r->pool, dobj->disk_info.name_len + 1);
len = dobj->disk_info.name_len;
rv = apr_file_read_full(fd, urlbuff, len, &len); if (rv != APR_SUCCESS) { return rv;
}
urlbuff[dobj->disk_info.name_len] = '\0';
/* check that we have the same URL */ /* Would strncmp be correct? */ if (strcmp(urlbuff, dobj->name) != 0) { return APR_EGENERAL;
}
return APR_SUCCESS;
}
staticconstchar* regen_key(apr_pool_t *p, apr_table_t *headers,
apr_array_header_t *varray, constchar *oldkey)
{ struct iovec *iov; int i, k; int nvec; constchar *header; constchar **elts;
/* TODO: * - Handle multiple-value headers better. (sort them?) * - Handle Case in-sensitive Values better. * This isn't the end of the world, since it just lowers the cache * hit rate, but it would be nice to fix. * * The majority are case insenstive if they are values (encoding etc). * Most of rfc2616 is case insensitive on header contents. * * So the better solution may be to identify headers which should be * treated case-sensitive? * HTTP URI's (3.2.3) [host and scheme are insensitive] * HTTP method (5.1.1) * HTTP-date values (3.3.1) * 3.7 Media Types [excerpt] * The type, subtype, and parameter attribute names are case- * insensitive. Parameter values might or might not be case-sensitive, * depending on the semantics of the parameter name. * 4.20 Except [excerpt] * Comparison of expectation values is case-insensitive for unquoted * tokens (including the 100-continue token), and is case-sensitive for * quoted-string expectation-extensions.
*/
/* Look up entity keyed to 'url' */ if (conf->cache_root == NULL) { if (!error_logged) {
error_logged = 1;
ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO(00703) "Cannot cache files to disk without a CacheRoot specified.");
} return DECLINED;
}
/* Create and init the cache object */
obj = apr_pcalloc(r->pool, sizeof(cache_object_t));
dobj = apr_pcalloc(r->pool, sizeof(disk_cache_object_t));
info = &(obj->info);
/* Open the headers file */
dobj->prefix = NULL;
/* Save the cache root */
dobj->root = apr_pstrmemdup(r->pool, conf->cache_root, conf->cache_root_len);
dobj->root_len = conf->cache_root_len;
/* oops, not vary as it turns out */
dobj->hdrs.fd = dobj->vary.fd;
dobj->vary.fd = NULL;
dobj->hdrs.file = dobj->vary.file;
/* This wasn't a Vary Format file, so we must seek to the * start of the file again, so that later reads work.
*/
apr_file_seek(dobj->hdrs.fd, APR_SET, &offset);
nkey = key;
}
/* Read the bytes to setup the cache_info fields */
rc = file_cache_recall_mydata(dobj->hdrs.fd, info, dobj, r); if (rc != APR_SUCCESS) {
ap_log_rerror(APLOG_MARK, APLOG_ERR, rc, r, APLOGNO(00706) "Cannot read header file %s", dobj->hdrs.file);
apr_file_close(dobj->hdrs.fd); return DECLINED;
}
/* Is this a cached HEAD request? */ if (dobj->disk_info.header_only && !r->header_only) {
ap_log_rerror(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r, APLOGNO(00707) "HEAD request cached, non-HEAD requested, ignoring: %s",
dobj->hdrs.file);
apr_file_close(dobj->hdrs.fd); return DECLINED;
}
/* Open the data file */ if (dobj->disk_info.has_body) {
flags = APR_READ | APR_BINARY; #ifdef APR_SENDFILE_ENABLED /* When we are in the quick handler we don't have the per-directory * configuration, so this check only takes the global setting of * the EnableSendFile directive into account.
*/
flags |= AP_SENDFILE_ENABLED(coreconf->enable_sendfile); #endif
rc = apr_file_open(&dobj->data.fd, dobj->data.file, flags, 0, r->pool); if (rc != APR_SUCCESS) {
ap_log_rerror(APLOG_MARK, APLOG_ERR, rc, r, APLOGNO(00708) "Cannot open data file %s", dobj->data.file);
apr_file_close(dobj->hdrs.fd); return DECLINED;
}
/* Atomic check - does the body file belong to the header file? */ if (dobj->disk_info.inode == finfo.inode &&
dobj->disk_info.device == finfo.device) {
/* Initialize the cache_handle callback functions */
ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(00709) "Recalled cached URL info header %s", dobj->name);
/* make the configuration stick */
h->cache_obj = obj;
obj->vobj = dobj;
return OK;
}
} else {
/* make the configuration stick */
h->cache_obj = obj;
obj->vobj = dobj;
return OK;
}
/* Oh dear, no luck matching header to the body */
ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(00710) "Cached URL info header '%s' didn't match body, ignoring this entry",
dobj->name);
/* Get disk cache object from cache handle */
dobj = (disk_cache_object_t *) h->cache_obj->vobj; if (!dobj) { return DECLINED;
}
/* Delete headers file */ if (dobj->hdrs.file) {
ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(00711) "Deleting %s from cache.", dobj->hdrs.file);
rc = apr_file_remove(dobj->hdrs.file, r->pool); if ((rc != APR_SUCCESS) && !APR_STATUS_IS_ENOENT(rc)) { /* Will only result in an output if httpd is started with -e debug. * For reason see log_error_core for the case s == NULL.
*/
ap_log_rerror(APLOG_MARK, APLOG_DEBUG, rc, r, APLOGNO(00712) "Failed to delete headers file %s from cache.",
dobj->hdrs.file); return DECLINED;
}
}
/* Delete data file */ if (dobj->data.file) {
ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(00713) "Deleting %s from cache.", dobj->data.file);
rc = apr_file_remove(dobj->data.file, r->pool); if ((rc != APR_SUCCESS) && !APR_STATUS_IS_ENOENT(rc)) { /* Will only result in an output if httpd is started with -e debug. * For reason see log_error_core for the case s == NULL.
*/
ap_log_rerror(APLOG_MARK, APLOG_DEBUG, rc, r, APLOGNO(00714) "Failed to delete data file %s from cache.",
dobj->data.file); return DECLINED;
}
}
/* now delete directories as far as possible up to our cache root */ if (dobj->root) { constchar *str_to_copy;
/* * now walk our way back to the cache root, delete everything * in the way as far as possible * * Note: due to the way we constructed the file names in * header_file and data_file, we are guaranteed that the * cache_root is suffixed by at least one '/' which will be * turned into a terminating null by this loop. Therefore, * we won't either delete or go above our cache root.
*/ for (q = dir + dobj->root_len; *q ; ) {
ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(00715) "Deleting directory %s from cache", dir);
/* ### What about APR_EOF? */
rv = apr_file_gets(w, MAX_STRING_LEN - 1, file); if (rv != APR_SUCCESS) {
ap_log_rerror(APLOG_MARK, APLOG_ERR, rv, r, APLOGNO(00717) "Premature end of cache headers."); return rv;
}
/* Delete terminal (CR?)LF */
p = strlen(w); /* Indeed, the host's '\n': '\012' for UNIX; '\015' for MacOS; '\025' for OS/390 -- whatever the script generates.
*/ if (p > 0 && w[p - 1] == '\n') { if (p > 1 && w[p - 2] == CR) {
w[p - 2] = '\0';
} else {
w[p - 1] = '\0';
}
}
/* If we've finished reading the headers, break out of the loop. */ if (w[0] == '\0') { break;
}
#if APR_CHARSET_EBCDIC /* Chances are that we received an ASCII header text instead of * the expected EBCDIC header lines. Try to auto-detect:
*/ if (!(l = strchr(w, ':'))) { int maybeASCII = 0, maybeEBCDIC = 0; unsignedchar *cp, native;
apr_size_t inbytes_left, outbytes_left;
/* if we see a bogus header don't ignore it. Shout and scream */ if (!(l = strchr(w, ':'))) { return APR_EGENERAL;
}
*l++ = '\0'; while (apr_isspace(*l)) {
++l;
}
apr_table_add(table, w, l);
}
return APR_SUCCESS;
}
/* * Reads headers from a buffer and returns an array of headers. * Returns NULL on file error * This routine tries to deal with too long lines and continuation lines. * @@@: XXX: FIXME: currently the headers are passed thru un-merged. * Is that okay, or should they be collapsed where possible?
*/ static apr_status_t recall_headers(cache_handle_t *h, request_rec *r)
{
disk_cache_object_t *dobj = (disk_cache_object_t *) h->cache_obj->vobj;
apr_status_t rv;
/* This case should not happen... */ if (!dobj->hdrs.fd) {
ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO(00719) "recalling headers; but no header fd for %s", dobj->name); return APR_NOTFOUND;
}
if (tmp) {
apr_array_header_t* varray;
apr_uint32_t format = VARY_FORMAT_VERSION;
/* If we were initially opened as a vary format, rollback * that internal state for the moment so we can recreate the * vary format hints in the appropriate directory.
*/ if (dobj->prefix) {
dobj->hdrs.file = dobj->prefix;
dobj->prefix = NULL;
}
rv = apr_file_writev_full(dobj->hdrs.tempfd, (conststruct iovec *) &iov,
2, &amt); if (rv != APR_SUCCESS) {
ap_log_rerror(APLOG_MARK, APLOG_WARNING, rv, r, APLOGNO(00726) "could not write info to header file %s",
dobj->hdrs.tempfile);
apr_file_close(dobj->hdrs.tempfd);
apr_pool_destroy(dobj->hdrs.pool); return rv;
}
if (dobj->headers_out) {
rv = store_table(dobj->hdrs.tempfd, dobj->headers_out); if (rv != APR_SUCCESS) {
ap_log_rerror(APLOG_MARK, APLOG_WARNING, rv, r, APLOGNO(00727) "could not write out-headers to header file %s",
dobj->hdrs.tempfile);
apr_file_close(dobj->hdrs.tempfd);
apr_pool_destroy(dobj->hdrs.pool); return rv;
}
}
/* Parse the vary header and dump those fields from the headers_in. */ /* FIXME: Make call to the same thing cache_select calls to crack Vary. */ if (dobj->headers_in) {
rv = store_table(dobj->hdrs.tempfd, dobj->headers_in); if (rv != APR_SUCCESS) {
ap_log_rerror(APLOG_MARK, APLOG_WARNING, rv, r, APLOGNO(00728) "could not write in-headers to header file %s",
dobj->hdrs.tempfile);
apr_file_close(dobj->hdrs.tempfd);
apr_pool_destroy(dobj->hdrs.pool); return rv;
}
}
rv = apr_file_close(dobj->hdrs.tempfd); /* flush and close */ if (rv != APR_SUCCESS) {
ap_log_rerror(APLOG_MARK, APLOG_WARNING, rv, r, APLOGNO(00729) "could not close header file %s",
dobj->hdrs.tempfile);
apr_pool_destroy(dobj->hdrs.pool); return rv;
}
/* are we done completely? if so, pass any trailing buckets right through */ if (dobj->done || !dobj->data.pool) {
APR_BUCKET_REMOVE(e);
APR_BRIGADE_INSERT_TAIL(out, e); continue;
}
/* have we seen eos yet? */ if (APR_BUCKET_IS_EOS(e)) {
seen_eos = 1;
dobj->done = 1;
APR_BUCKET_REMOVE(e);
APR_BRIGADE_INSERT_TAIL(out, e); break;
}
/* honour flush buckets, we'll get called again */ if (APR_BUCKET_IS_FLUSH(e)) {
APR_BUCKET_REMOVE(e);
APR_BRIGADE_INSERT_TAIL(out, e); break;
}
/* metadata buckets are preserved as is */ if (APR_BUCKET_IS_METADATA(e)) {
APR_BUCKET_REMOVE(e);
APR_BRIGADE_INSERT_TAIL(out, e); continue;
}
/* read the bucket, write to the cache */
rv = apr_bucket_read(e, &str, &length, APR_BLOCK_READ);
APR_BUCKET_REMOVE(e);
APR_BRIGADE_INSERT_TAIL(out, e); if (rv != APR_SUCCESS) {
ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO(00730) "Error when reading bucket for URL %s",
h->cache_obj->key); /* Remove the intermediate cache file and return non-APR_SUCCESS */
apr_pool_destroy(dobj->data.pool); return rv;
}
/* don't write empty buckets to the cache */ if (!length) { continue;
}
if (!dobj->disk_info.header_only) {
/* Attempt to create the data file at the last possible moment, if * the body is empty, we don't write a file at all, and save an inode.
*/ if (!dobj->data.tempfd) {
apr_finfo_t finfo;
rv = apr_file_mktemp(&dobj->data.tempfd, dobj->data.tempfile,
APR_CREATE | APR_WRITE | APR_BINARY | APR_BUFFERED
| APR_EXCL, dobj->data.pool); if (rv != APR_SUCCESS) {
apr_pool_destroy(dobj->data.pool); return rv;
}
dobj->file_size = 0;
rv = apr_file_info_get(&finfo, APR_FINFO_IDENT,
dobj->data.tempfd); if (rv != APR_SUCCESS) {
apr_pool_destroy(dobj->data.pool); return rv;
}
dobj->disk_info.device = finfo.device;
dobj->disk_info.inode = finfo.inode;
dobj->disk_info.has_body = 1;
}
/* write to the cache, leave if we fail */
rv = apr_file_write_full(dobj->data.tempfd, str, length, &written); if (rv != APR_SUCCESS) {
ap_log_rerror(
APLOG_MARK, APLOG_ERR, 0, r, APLOGNO(00731) "Error when writing cache file for URL %s", h->cache_obj->key); /* Remove the intermediate cache file and return non-APR_SUCCESS */
apr_pool_destroy(dobj->data.pool); return rv;
}
dobj->file_size += written; if (dobj->file_size > dconf->maxfs) {
ap_log_rerror(
APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(00732) "URL %s failed the size check " "(%" APR_OFF_T_FMT ">%" APR_OFF_T_FMT ")", h->cache_obj->key, dobj->file_size, dconf->maxfs); /* Remove the intermediate cache file and return non-APR_SUCCESS */
apr_pool_destroy(dobj->data.pool); return APR_EGENERAL;
}
}
/* have we reached the limit of how much we're prepared to write in one * go? If so, leave, we'll get called again. This prevents us from trying * to swallow too much data at once, or taking so long to write the data * the client times out.
*/
dobj->offset -= length; if (dobj->offset <= 0) {
dobj->offset = 0; break;
} if ((dconf->readtime && apr_time_now() > dobj->timeout)) {
dobj->timeout = 0; break;
}
}
/* Was this the final bucket? If yes, close the temp file and perform * sanity checks.
*/ if (seen_eos) { if (!dobj->disk_info.header_only) { constchar *cl_header;
apr_off_t cl;
if (dobj->data.tempfd) {
rv = apr_file_close(dobj->data.tempfd); if (rv != APR_SUCCESS) { /* Buffered write failed, abandon attempt to write */
apr_pool_destroy(dobj->data.pool); return rv;
}
}
if (r->connection->aborted || r->no_cache) {
ap_log_rerror(
APLOG_MARK, APLOG_INFO, 0, r, APLOGNO(00733) "Discarding body for URL %s " "because connection has been aborted.", h->cache_obj->key); /* Remove the intermediate cache file and return non-APR_SUCCESS */
apr_pool_destroy(dobj->data.pool); return APR_EGENERAL;
}
if (dobj->file_size < dconf->minfs) {
ap_log_rerror(
APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(00734) "URL %s failed the size check " "(%" APR_OFF_T_FMT "<%" APR_OFF_T_FMT ")", h->cache_obj->key, dobj->file_size, dconf->minfs); /* Remove the intermediate cache file and return non-APR_SUCCESS */
apr_pool_destroy(dobj->data.pool); return APR_EGENERAL;
}
/* write the headers to disk at the last possible moment */
rv = write_headers(h, r);
/* move header and data tempfiles to the final destination */ if (APR_SUCCESS == rv) {
rv = file_cache_el_final(conf, &dobj->hdrs, r);
} if (APR_SUCCESS == rv) {
rv = file_cache_el_final(conf, &dobj->vary, r);
} if (APR_SUCCESS == rv) { if (!dobj->disk_info.header_only) {
rv = file_cache_el_final(conf, &dobj->data, r);
} elseif (dobj->data.file) {
rv = apr_file_remove(dobj->data.file, dobj->data.pool);
}
}
/* remove the cached items completely on any failure */ if (APR_SUCCESS != rv) {
remove_url(h, r);
ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(00736) "commit_entity: URL '%s' not cached due to earlier disk error.",
dobj->name);
} else {
ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(00737) "commit_entity: Headers and body for URL %s cached.",
dobj->name);
}
/* * Consider eliminating the next two directives in favor of * Ian's prime number hash... * key = hash_fn( r->uri) * filename = "/key % prime1 /key %prime2/key %prime3"
*/ staticconstchar
*set_cache_dirlevels(cmd_parms *parms, void *in_struct_ptr, constchar *arg)
{
disk_cache_conf *conf = ap_get_module_config(parms->server->module_config,
&cache_disk_module); int val = atoi(arg); if (val < 1) return"CacheDirLevels value must be an integer greater than 0"; if (val * conf->dirlength > CACHEFILE_LEN) return"CacheDirLevels*CacheDirLength value must not be higher than 20";
conf->dirlevels = val; return NULL;
} staticconstchar
*set_cache_dirlength(cmd_parms *parms, void *in_struct_ptr, constchar *arg)
{
disk_cache_conf *conf = ap_get_module_config(parms->server->module_config,
&cache_disk_module); int val = atoi(arg); if (val < 1) return"CacheDirLength value must be an integer greater than 0"; if (val * conf->dirlevels > CACHEFILE_LEN) return"CacheDirLevels*CacheDirLength value must not be higher than 20";
if (apr_strtoff(&dconf->minfs, arg, NULL, 10) != APR_SUCCESS ||
dconf->minfs < 0)
{ return"CacheMinFileSize argument must be a non-negative integer representing the min size of a file to cache in bytes.";
}
dconf->minfs_set = 1; return NULL;
}
if (apr_strtoff(&dconf->maxfs, arg, NULL, 10) != APR_SUCCESS ||
dconf->maxfs < 0)
{ return"CacheMaxFileSize argument must be a non-negative integer representing the max size of a file to cache in bytes.";
}
dconf->maxfs_set = 1; return NULL;
}
if (apr_strtoff(&dconf->readsize, arg, NULL, 10) != APR_SUCCESS ||
dconf->readsize < 0)
{ return"CacheReadSize argument must be a non-negative integer representing the max amount of data to cache in go.";
}
dconf->readsize_set = 1; return NULL;
}
if (apr_strtoff(&milliseconds, arg, NULL, 10) != APR_SUCCESS ||
milliseconds < 0)
{ return"CacheReadTime argument must be a non-negative integer representing the max amount of time taken to cache in go.";
}
dconf->readtime = apr_time_from_msec(milliseconds);
dconf->readtime_set = 1; return NULL;
}
staticconst command_rec disk_cache_cmds[] =
{
AP_INIT_TAKE1("CacheRoot", set_cache_root, NULL, RSRC_CONF, "The directory to store cache files"),
AP_INIT_TAKE1("CacheDirLevels", set_cache_dirlevels, NULL, RSRC_CONF, "The number of levels of subdirectories in the cache"),
AP_INIT_TAKE1("CacheDirLength", set_cache_dirlength, NULL, RSRC_CONF, "The number of characters in subdirectory names"),
AP_INIT_TAKE1("CacheMinFileSize", set_cache_minfs, NULL, RSRC_CONF | ACCESS_CONF, "The minimum file size to cache a document"),
AP_INIT_TAKE1("CacheMaxFileSize", set_cache_maxfs, NULL, RSRC_CONF | ACCESS_CONF, "The maximum file size to cache a document"),
AP_INIT_TAKE1("CacheReadSize", set_cache_readsize, NULL, RSRC_CONF | ACCESS_CONF, "The maximum quantity of data to attempt to read and cache in one go"),
AP_INIT_TAKE1("CacheReadTime", set_cache_readtime, NULL, RSRC_CONF | ACCESS_CONF, "The maximum time taken to attempt to read and cache in go"),
{NULL}
};
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung ist noch experimentell.