/* Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License.
*/
/* mod_speling.c - by Alexei Kosut <akosut@organic.com> June, 1996 * * This module is transparent, and simple. It attempts to correct * misspellings of URLs that users might have entered, namely by checking * capitalizations. If it finds a match, it sends a redirect. * * Sep-1999 Hugo Haas <hugo@w3.org> * o Added a CheckCaseOnly option to check only miscapitalized words. * * 08-Aug-1997 <Martin.Kraemer@Mch.SNI.De> * o Upgraded module interface to apache_1.3a2-dev API (more NULL's in * speling_module). * o Integrated tcsh's "spelling correction" routine which allows one * misspelling (character insertion/omission/typo/transposition). * Rewrote it to ignore case as well. This ought to catch the majority * of misspelled requests. * o Commented out the second pass where files' suffixes are stripped. * Given the better hit rate of the first pass, this rather ugly * (request index.html, receive index.db ?!?!) solution can be * omitted. * o wrote a "kind of" html page for mod_speling * * Activate it with "CheckSpelling On"
*/
module AP_MODULE_DECLARE_DATA speling_module;
typedefstruct { int enabled; int check_case_only; int check_basename_match;
} spconfig;
/* * Create a configuration specific to this module for a server or directory * location, and fill it with the default settings. * * The API says that in the absence of a merge function, the record for the * closest ancestor is used exclusively. That's what we want, so we don't * bother to have such a function.
*/
/* * Respond to a callback to create configuration record for a server or * vhost environment.
*/ staticvoid *create_mconfig_for_server(apr_pool_t *p, server_rec *s)
{ return mkconfig(p);
}
/* * Respond to a callback to create a config record for a specific directory.
*/ staticvoid *create_mconfig_for_directory(apr_pool_t *p, char *dir)
{ return mkconfig(p);
}
/* * Define the directives specific to this module. This structure is referenced * later by the 'module' structure.
*/ staticconst command_rec speling_cmds[] =
{
AP_INIT_FLAG("CheckSpelling", ap_set_flag_slot,
(void*)APR_OFFSETOF(spconfig, enabled), OR_OPTIONS, "whether or not to fix miscapitalized/misspelled requests"),
AP_INIT_FLAG("CheckCaseOnly", ap_set_flag_slot,
(void*)APR_OFFSETOF(spconfig, check_case_only), OR_OPTIONS, "whether or not to fix only miscapitalized requests"),
AP_INIT_FLAG("CheckBasenameMatch", ap_set_flag_slot,
(void*)APR_OFFSETOF(spconfig, check_basename_match), OR_OPTIONS, "whether or not to fix files with the same base name"),
{ NULL }
};
/* * spdist() is taken from Kernighan & Pike, * _The_UNIX_Programming_Environment_ * and adapted somewhat to correspond better to psychological reality. * (Note the changes to the return values) * * According to Pollock and Zamora, CACM April 1984 (V. 27, No. 4), * page 363, the correct order for this is: * OMISSION = TRANSPOSITION > INSERTION > SUBSTITUTION * thus, it was exactly backwards in the old version. -- PWP * * This routine was taken out of tcsh's spelling correction code * (tcsh-6.07.04) and re-converted to apache data types ("char" type * instead of tcsh's NLS'ed "Char"). Plus it now ignores the case * during comparisons, so is a "approximate strcasecmp()". * NOTE that is still allows only _one_ real "typo", * it does NOT try to correct multiple errors.
*/
static sp_reason spdist(constchar *s, constchar *t)
{ for (; apr_tolower(*s) == apr_tolower(*t); t++, s++) { if (*t == '\0') { return SP_MISCAPITALIZED; /* exact match (sans case) */
}
} if (*s) { if (*t) { if (s[1] && t[1] && apr_tolower(*s) == apr_tolower(t[1])
&& apr_tolower(*t) == apr_tolower(s[1])
&& strcasecmp(s + 2, t + 2) == 0) { return SP_TRANSPOSITION; /* transposition */
} if (strcasecmp(s + 1, t + 1) == 0) { return SP_SIMPLETYPO; /* 1 char mismatch */
}
} if (strcasecmp(s + 1, t) == 0) { return SP_EXTRACHAR; /* extra character */
}
} if (*t && strcasecmp(s, t + 1) == 0) { return SP_MISSINGCHAR; /* missing character */
} return SP_VERYDIFFERENT; /* distance too large to fix. */
}
cfg = ap_get_module_config(r->per_dir_config, &speling_module); if (!cfg->enabled) { return DECLINED;
}
/* We only want to worry about GETs */ if (r->method_number != M_GET) { return DECLINED;
}
/* We've already got a file of some kind or another */ if (r->finfo.filetype != APR_NOFILE) { return DECLINED;
}
/* Not a file request */ if (r->proxyreq || !r->filename) { return DECLINED;
}
/* This is a sub request - don't mess with it */ if (r->main) { return DECLINED;
}
/* * The request should end up looking like this: * r->uri: /correct-url/mispelling/more * r->filename: /correct-file/mispelling r->path_info: /more * * So we do this in steps. First break r->filename into two pieces
*/
filoc = ap_rind(r->filename, '/'); /* * Don't do anything if the request doesn't contain a slash, or * requests "/"
*/ if (filoc == -1 || strcmp(r->uri, "/") == 0) { return DECLINED;
}
/* good = /correct-file */
good = apr_pstrndup(r->pool, r->filename, filoc); /* bad = mispelling */
bad = apr_pstrdup(r->pool, r->filename + filoc + 1); /* postgood = mispelling/more */
postgood = apr_pstrcat(r->pool, bad, r->path_info, NULL);
urlen = strlen(r->uri);
pglen = strlen(postgood);
/* Check to see if the URL pieces add up */ if (strcmp(postgood, r->uri + (urlen - pglen))) { return DECLINED;
}
/* Now open the directory and do ourselves a check... */ if (apr_dir_open(&dir, good, r->pool) != APR_SUCCESS) { /* Oops, not a directory... */ return DECLINED;
}
while (apr_dir_read(&dirent, APR_FINFO_DIRENT, dir) == APR_SUCCESS) {
sp_reason q;
/* * If we end up with a "fixed" URL which is identical to the * requested one, we must have found a broken symlink or some such. * Do _not_ try to redirect this, it causes a loop!
*/ if (strcmp(bad, dirent.name) == 0) {
apr_dir_close(dir); return OK;
}
/* * miscapitalization errors are checked first (like, e.g., lower case * file, upper case request)
*/ elseif (strcasecmp(bad, dirent.name) == 0) {
misspelled_file *sp_new;
/* * The spdist() should have found the majority of the misspelled * requests. It is of questionable use to continue looking for * files with the same base name, but potentially of totally wrong * type (index.html <-> index.db). * * If you're using MultiViews, and have a file named foobar.html, * which you refer to as "foobar", and someone tried to access * "Foobar", without CheckBasenameMatch, mod_speling won't find it, * because it won't find anything matching that spelling. * With the extension-munging, it would locate "foobar.html".
*/ elseif (cfg->check_basename_match == 1) { /* * Okay... we didn't find anything. Now we take out the hard-core * power tools. There are several cases here. Someone might have * entered a wrong extension (.htm instead of .html or vice * versa) or the document could be negotiated. At any rate, now * we just compare stuff before the first dot. If it matches, we * figure we got us a match. This can result in wrong things if * there are files of different content types but the same prefix * (e.g. foo.gif and foo.html) This code will pick the first one * it finds. Better than a Not Found, though.
*/ int entloc = ap_ind(dirent.name, '.'); if (entloc == -1) {
entloc = strlen(dirent.name);
}
if (candidates->nelts != 0) { /* Wow... we found us a mispelling. Construct a fixed url */ char *nuri; constchar *ref;
misspelled_file *variant = (misspelled_file *) candidates->elts; int i;
/* * Conditions for immediate redirection: * a) the first candidate was not found by stripping the suffix * AND b) there exists only one candidate OR the best match is not * ambiguous * then return a redirection right away.
*/ if (variant[0].quality != SP_VERYDIFFERENT
&& (candidates->nelts == 1
|| variant[0].quality != variant[1].quality)) {
ap_log_rerror(APLOG_MARK, APLOG_INFO, APR_SUCCESS,
r,
ref ? APLOGNO(03224) "Fixed spelling: %s to %s from %s"
: APLOGNO(03225) "Fixed spelling: %s to %s%s",
r->uri, nuri,
(ref ? ref : ""));
return HTTP_MOVED_PERMANENTLY;
} /* * Otherwise, a "[300] Multiple Choices" list with the variants is * returned.
*/ else {
apr_pool_t *p;
apr_table_t *notes;
apr_pool_t *sub_pool;
apr_array_header_t *t;
apr_array_header_t *v;
if (r->main == NULL) {
p = r->pool;
notes = r->notes;
} else {
p = r->main->pool;
notes = r->main->notes;
}
if (apr_pool_create(&sub_pool, p) != APR_SUCCESS) return DECLINED;
apr_pool_tag(sub_pool, "speling_sub");
t = apr_array_make(sub_pool, candidates->nelts * 8 + 8, sizeof(char *));
v = apr_array_make(sub_pool, candidates->nelts * 5, sizeof(char *));
/* Generate the response text. */
*(constchar **)apr_array_push(t) = "The document name you requested (";
*(constchar **)apr_array_push(t) = ap_escape_html(sub_pool, r->uri);
*(constchar **)apr_array_push(t) = ") could not be found on this server.\n" "However, we found documents with names similar " "to the one you requested.
"
"Available documents:\n
\n"
;
for (i = 0; i < candidates->nelts; ++i) { char *vuri; constchar *reason;
/* * when we have printed the "close matches" and there are * more "distant matches" (matched by stripping the suffix), * then we insert an additional separator text to suggest * that the user LOOK CLOSELY whether these are really the * files she wanted.
*/ if (i > 0 && i < candidates->nelts - 1
&& variant[i].quality != SP_VERYDIFFERENT
&& variant[i + 1].quality == SP_VERYDIFFERENT) {
*(constchar **)apr_array_push(t) = "\nFurthermore, the following related " "documents were found:\n
\n"
;
}
}
*(constchar **)apr_array_push(t) = "\n";
/* If we know there was a referring page, add a note: */ if (ref != NULL) {
*(constchar **)apr_array_push(t) = "Please consider informing the owner of the " "referring page ";
*(constchar **)apr_array_push(t) = ap_escape_html(sub_pool, ref);
*(constchar **)apr_array_push(t) = " about the broken link.\n";
}
/* Pass our apr_table_t to http_protocol.c (see mod_negotiation): */
apr_table_setn(notes, "variant-list", apr_array_pstrcat(p, t, 0));
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung ist noch experimentell.