diff options
author | Dries Buytaert <dries@buytaert.net> | 2009-04-22 09:45:03 +0000 |
---|---|---|
committer | Dries Buytaert <dries@buytaert.net> | 2009-04-22 09:45:03 +0000 |
commit | 526401c4c8f7e1a8126c4810a7d7023c4f8bafc5 (patch) | |
tree | 27de106e9904b714e62a02c2f579a28743a4fded /includes | |
parent | 2bc19555bfca04551333e361509c2f51841e16c2 (diff) | |
download | brdo-526401c4c8f7e1a8126c4810a7d7023c4f8bafc5.tar.gz brdo-526401c4c8f7e1a8126c4810a7d7023c4f8bafc5.tar.bz2 |
- Patch #147310 by c960657 et al: better cache headers for reverse proxies.
Diffstat (limited to 'includes')
-rw-r--r-- | includes/bootstrap.inc | 278 | ||||
-rw-r--r-- | includes/cache.inc | 53 | ||||
-rw-r--r-- | includes/common.inc | 63 | ||||
-rw-r--r-- | includes/file.inc | 9 | ||||
-rw-r--r-- | includes/theme.maintenance.inc | 4 |
5 files changed, 290 insertions, 117 deletions
diff --git a/includes/bootstrap.inc b/includes/bootstrap.inc index 19231cb7e..94cd83447 100644 --- a/includes/bootstrap.inc +++ b/includes/bootstrap.inc @@ -744,33 +744,211 @@ function drupal_load($type, $name) { } /** + * Set an HTTP response header for the current page. + * + * Note: When sending a Content-Type header, always include a 'charset' type, + * too. This is necessary to avoid security bugs (e.g. UTF-7 XSS). + * + * @param $name + * The HTTP header name, or a status code followed by a reason phrase, e.g. + * "404 Not Found". + * @param $value + * The HTTP header value; if omitted, the specified header is unset. + * @param $append + * Whether to append the value to an existing header or to replace it. + */ +function drupal_set_header($name = NULL, $value = NULL, $append = FALSE) { + // The headers as name/value pairs. + $headers = &drupal_static(__FUNCTION__, array()); + + if (!isset($name)) { + return $headers; + } + + // Save status codes using the special key ":status". + if (preg_match('/^\d{3} /', $name)) { + $value = $name; + $name = ':status'; + } + else { + _drupal_set_preferred_header_name($name); + $name = strtolower($name); + } + + if (!isset($value)) { + $headers[$name] = FALSE; + } + elseif (isset($headers[$name]) && $append) { + // Multiple headers with identical names may be combined using comma (RFC + // 2616, section 4.2). + $headers[$name] .= ',' . $value; + } + else { + $headers[$name] = $value; + } + drupal_send_headers(array($name => $headers[$name]), TRUE); +} + +/** + * Get the HTTP response headers for the current page. + * + * @param $name + * An HTTP header name. If omitted, all headers are returned as name/value + * pairs. If an array value is FALSE, the header has been unset. + * @return + * A string containing the header value, or FALSE if the header has been set, + * or NULL if the header has not been set. + */ +function drupal_get_header($name = NULL) { + $headers = drupal_set_header(); + if (isset($name)) { + $name = strtolower($name); + return isset($headers[$name]) ? $headers[$name] : NULL; + } + else { + return $headers; + } +} + +/** + * Header names are case-insensitive, but for maximum compatibility they should + * follow "common form" (see RFC 2617, section 4.2). + */ +function _drupal_set_preferred_header_name($name = NULL) { + static $header_names = array(); + + if (!isset($name)) { + return $header_names; + } + $header_names[strtolower($name)] = $name; +} + +/** + * Send the HTTP response headers previously set using drupal_set_header(). + * Add default headers, unless they have been replaced or unset using + * drupal_set_header(). + * + * @param $default_headers + * An array of headers as name/value pairs. + * @param $single + * If TRUE and headers have already be sent, send only the specified header. + */ +function drupal_send_headers($default_headers = array(), $only_default = FALSE) { + $headers_sent = &drupal_static(__FUNCTION__, FALSE); + $headers = drupal_get_header(); + if ($only_default && $headers_sent) { + $headers = array(); + } + $headers_sent = TRUE; + + $header_names = _drupal_set_preferred_header_name(); + foreach ($default_headers as $name => $value) { + $name_lower = strtolower($name); + if (!isset($headers[$name_lower])) { + $headers[$name_lower] = $value; + $header_names[$name_lower] = $name; + } + } + foreach ($headers as $name_lower => $value) { + if ($name_lower == ':status') { + header($_SERVER['SERVER_PROTOCOL'] . ' ' . $value); + } + // Skip headers that have been unset. + elseif ($value) { + header($header_names[$name_lower] . ': ' . $value); + } + } +} + +/** * Set HTTP headers in preparation for a page response. * - * Authenticated users are always given a 'no-cache' header, and will - * fetch a fresh page on every request. This prevents authenticated - * users seeing locally cached pages that show them as logged out. + * Authenticated users are always given a 'no-cache' header, and will fetch a + * fresh page on every request. This prevents authenticated users from seeing + * locally cached pages. + * + * Also give each page a unique ETag. This will force clients to include both + * an If-Modified-Since header and an If-None-Match header when doing + * conditional requests for the page (required by RFC 2616, section 13.3.4), + * making the validation more robust. This is a workaround for a bug in Mozilla + * Firefox that is triggered when Drupal's caching is enabled and the user + * accesses Drupal via an HTTP proxy (see + * https://bugzilla.mozilla.org/show_bug.cgi?id=269303): When an authenticated + * user requests a page, and then logs out and requests the same page again, + * Firefox may send a conditional request based on the page that was cached + * locally when the user was logged in. If this page did not have an ETag + * header, the request only contains an If-Modified-Since header. The date will + * be recent, because with authenticated users the Last-Modified header always + * refers to the time of the request. If the user accesses Drupal via a proxy + * server, and the proxy already has a cached copy of the anonymous page with an + * older Last-Modified date, the proxy may respond with 304 Not Modified, making + * the client think that the anonymous and authenticated pageviews are + * identical. * * @see page_set_cache() */ function drupal_page_header() { - header("Expires: Sun, 19 Nov 1978 05:00:00 GMT"); - header("Last-Modified: " . gmdate("D, d M Y H:i:s") . " GMT"); - header("Cache-Control: store, no-cache, must-revalidate"); - header("Cache-Control: post-check=0, pre-check=0", FALSE); + $headers_sent = &drupal_static(__FUNCTION__, FALSE); + if ($headers_sent) { + return TRUE; + } + $headers_sent = TRUE; + + $default_headers = array( + 'Expires' => 'Sun, 19 Nov 1978 05:00:00 GMT', + 'Last-Modified' => gmdate(DATE_RFC1123, REQUEST_TIME), + 'Cache-Control' => 'no-cache, must-revalidate, post-check=0, pre-check=0', + 'ETag' => '"' . REQUEST_TIME . '"', + ); + drupal_send_headers($default_headers); } /** * Set HTTP headers in preparation for a cached page response. * - * The general approach here is that anonymous users can keep a local - * cache of the page, but must revalidate it on every request. Then, - * they are given a '304 Not Modified' response as long as they stay - * logged out and the page has not been modified. - * - */ -function drupal_page_cache_header($cache) { - // Create entity tag based on cache update time. - $etag = '"' . md5($cache->created) . '"'; + * The headers allow as much as possible in proxies and browsers without any + * particular knowledge about the pages. Modules can override these headers + * using drupal_set_header(). + * + * If the request is conditional (using If-Modified-Since and If-None-Match), + * and the conditions match those currently in the cache, a 304 Not Modified + * response is sent. + */ +function drupal_page_cache_header(stdClass $cache) { + // Negotiate whether to use compression. + $page_compression = variable_get('page_compression', TRUE) && extension_loaded('zlib'); + $return_compressed = $page_compression && isset($_SERVER['HTTP_ACCEPT_ENCODING']) && strpos($_SERVER['HTTP_ACCEPT_ENCODING'], 'gzip') !== FALSE; + + // Get headers set in hook_boot(). Keys are lower-case. + $hook_boot_headers = drupal_get_header(); + + // Headers generated in this function, that may be replaced or unset using + // drupal_set_headers(). Keys are mixed-case. + $default_headers = array(); + + foreach ($cache->headers as $name => $value) { + // In the case of a 304 response, certain headers must be sent, and the + // remaining may not (see RFC 2616, section 10.3.5). Do not override + // headers set in hook_boot(). + $name_lower = strtolower($name); + if (in_array($name_lower, array('content-location', 'expires', 'cache-control', 'vary')) && !isset($hook_boot_headers[$name_lower])) { + drupal_set_header($name, $value); + unset($cache->headers[$name]); + } + } + + // If a cache is served from a HTTP proxy without hitting the web server, + // the boot and exit hooks cannot be fired, so only allow caching in + // proxies with aggressive caching. If the client send a session cookie, do + // not bother caching the page in a public proxy, because the cached copy + // will only be served to that particular user due to Vary: Cookie, unless + // the Vary header has been replaced or unset in hook_boot() (see below). + $max_age = variable_get('cache') == CACHE_AGGRESSIVE && (!isset($_COOKIE[session_name()]) || isset($hook_boot_headers['vary'])) ? variable_get('cache_lifetime', 0) : 0; + $default_headers['Cache-Control'] = 'public, max-age=' . $max_age; + + // Entity tag should change if the output changes. + $etag = '"' . $cache->created . '-' . intval($return_compressed) . '"'; + header('Etag: ' . $etag); // See if the client has provided the required HTTP headers. $if_modified_since = isset($_SERVER['HTTP_IF_MODIFIED_SINCE']) ? strtotime($_SERVER['HTTP_IF_MODIFIED_SINCE']) : FALSE; @@ -780,36 +958,46 @@ function drupal_page_cache_header($cache) { && $if_none_match == $etag // etag must match && $if_modified_since == $cache->created) { // if-modified-since must match header($_SERVER['SERVER_PROTOCOL'] . ' 304 Not Modified'); - // All 304 responses must send an etag if the 200 response for the same object contained an etag - header("Etag: $etag"); + drupal_send_headers($default_headers); return; } - // Send appropriate response: - header("Last-Modified: " . gmdate(DATE_RFC1123, $cache->created)); - header("ETag: $etag"); + // Send the remaining headers. + foreach ($cache->headers as $name => $value) { + drupal_set_header($name, $value); + } - // The following headers force validation of cache: - header("Expires: Sun, 19 Nov 1978 05:00:00 GMT"); - header("Cache-Control: must-revalidate"); + $default_headers['Last-Modified'] = gmdate(DATE_RFC1123, $cache->created); - if (variable_get('page_compression', TRUE)) { - // Determine if the browser accepts gzipped data. - if (@strpos($_SERVER['HTTP_ACCEPT_ENCODING'], 'gzip') === FALSE && function_exists('gzencode')) { - // Strip the gzip header and run uncompress. - $cache->data = gzinflate(substr(substr($cache->data, 10), 0, -8)); - } - elseif (function_exists('gzencode')) { - header('Content-Encoding: gzip'); - } + // HTTP/1.0 proxies does not support the Vary header, so prevent any caching + // by sending an Expires date in the past. HTTP/1.1 clients ignores the + // Expires header if a Cache-Control: max-age= directive is specified (see RFC + // 2616, section 14.9.3). + $default_headers['Expires'] = 'Sun, 19 Nov 1978 05:00:00 GMT'; + + drupal_send_headers($default_headers); + + // Allow HTTP proxies to cache pages for anonymous users without a session + // cookie. The Vary header is used to indicates the set of request-header + // fields that fully determines whether a cache is permitted to use the + // response to reply to a subsequent request for a given URL without + // revalidation. If a Vary header has been set in hook_boot(), it is assumed + // that the module knows how to cache the page. + if (!isset($hook_boot_headers['vary']) && !variable_get('omit_vary_cookie')) { + header('Vary: Cookie'); } - // Send the original request's headers. We send them one after - // another so PHP's header() function can deal with duplicate - // headers. - $headers = explode("\n", $cache->headers); - foreach ($headers as $header) { - header($header); + if ($page_compression) { + header('Vary: Accept-Encoding', FALSE); + // If page_compression is enabled, the cache contains gzipped data. + if ($return_compressed) { + header('Content-Encoding: gzip'); + } + else { + // The client does not support compression, so unzip the data in the + // cache. Strip the gzip header and run uncompress. + $cache->data = gzinflate(substr(substr($cache->data, 10), 0, -8)); + } } print $cache->data; @@ -1206,6 +1394,7 @@ function _drupal_bootstrap($phase) { if (drupal_session_is_started() && empty($_SESSION)) { session_destroy(); } + header('X-Drupal-Cache: HIT'); drupal_page_cache_header($cache); // If the skipping of the bootstrap hooks is not enforced, call hook_exit. if ($cache_mode != CACHE_AGGRESSIVE) { @@ -1214,15 +1403,20 @@ function _drupal_bootstrap($phase) { // We are done. exit; } + // Prepare for non-cached page workflow. - drupal_page_header(); + // If the session has not already been started and output buffering is - // not enabled, the session must be started now before the HTTP headers - // are sent. If output buffering is enabled, the session may be started + // not enabled, the HTTP headers must be sent now, including the session + // cookie. If output buffering is enabled, the session may be started // at any time using drupal_session_start(). if ($cache === FALSE) { + drupal_page_header(); drupal_session_start(); } + else { + header('X-Drupal-Cache: MISS'); + } break; case DRUPAL_BOOTSTRAP_LANGUAGE: diff --git a/includes/cache.inc b/includes/cache.inc index 6d9c94e61..5b7a61170 100644 --- a/includes/cache.inc +++ b/includes/cache.inc @@ -30,33 +30,30 @@ function cache_get($cid, $table = 'cache') { } $cache = db_query("SELECT data, created, headers, expire, serialized FROM {" . $table . "} WHERE cid = :cid", array(':cid' => $cid))->fetchObject(); - if (isset($cache->data)) { - // If the data is permanent or we're not enforcing a minimum cache lifetime - // always return the cached data. - if ($cache->expire == CACHE_PERMANENT || !variable_get('cache_lifetime', 0)) { - if ($cache->serialized) { - $cache->data = unserialize($cache->data); - } - } - // If enforcing a minimum cache lifetime, validate that the data is - // currently valid for this user before we return it by making sure the - // cache entry was created before the timestamp in the current session's - // cache timer. The cache variable is loaded into the $user object by - // _sess_read() in session.inc. - else { - if ($user->cache > $cache->created) { - // This cache data is too old and thus not valid for us, ignore it. - return FALSE; - } - else { - if ($cache->serialized) { - $cache->data = unserialize($cache->data); - } - } - } - return $cache; + + if (!isset($cache->data)) { + return FALSE; + } + + // If enforcing a minimum cache lifetime, validate that the data is + // currently valid for this user before we return it by making sure the cache + // entry was created before the timestamp in the current session's cache + // timer. The cache variable is loaded into the $user object by _sess_read() + // in session.inc. If the data is permanent or we're not enforcing a minimum + // cache lifetime always return the cached data. + if ($cache->expire != CACHE_PERMANENT && variable_get('cache_lifetime', 0) && $user->cache > $cache->created) { + // This cache data is too old and thus not valid for us, ignore it. + return FALSE; } - return FALSE; + + if ($cache->serialized) { + $cache->data = unserialize($cache->data); + } + if (isset($cache->headers)) { + $cache->headers = unserialize($cache->headers); + } + + return $cache; } /** @@ -104,12 +101,12 @@ function cache_get($cid, $table = 'cache') { * @param $headers * A string containing HTTP header information for cached pages. */ -function cache_set($cid, $data, $table = 'cache', $expire = CACHE_PERMANENT, $headers = NULL) { +function cache_set($cid, $data, $table = 'cache', $expire = CACHE_PERMANENT, array $headers = NULL) { $fields = array( 'serialized' => 0, 'created' => REQUEST_TIME, 'expire' => $expire, - 'headers' => $headers, + 'headers' => isset($headers) ? serialize($headers) : NULL, ); if (!is_string($data)) { $fields['data'] = serialize($data); diff --git a/includes/common.inc b/includes/common.inc index 97efc72c3..55b7a278b 100644 --- a/includes/common.inc +++ b/includes/common.inc @@ -155,32 +155,6 @@ function drupal_clear_path_cache() { } /** - * Set an HTTP response header for the current page. - * - * Note: When sending a Content-Type header, always include a 'charset' type, - * too. This is necessary to avoid security bugs (e.g. UTF-7 XSS). - */ -function drupal_set_header($header = NULL) { - // We use an array to guarantee there are no leading or trailing delimiters. - // Otherwise, header('') could get called when serving the page later, which - // ends HTTP headers prematurely on some PHP versions. - static $stored_headers = array(); - - if (strlen($header)) { - header($header); - $stored_headers[] = $header; - } - return implode("\n", $stored_headers); -} - -/** - * Get the HTTP response headers for the current page. - */ -function drupal_get_headers() { - return drupal_set_header(); -} - -/** * Add a feed URL for the current page. * * This function can be called as long the HTML header hasn't been sent. @@ -357,7 +331,7 @@ function drupal_goto($path = '', $query = NULL, $fragment = NULL, $http_response */ function drupal_site_offline() { drupal_maintenance_theme(); - drupal_set_header($_SERVER['SERVER_PROTOCOL'] . ' 503 Service unavailable'); + drupal_set_header('503 Service unavailable'); drupal_set_title(t('Site offline')); print theme('maintenance_page', filter_xss_admin(variable_get('site_offline_message', t('@site is currently under maintenance. We should be back shortly. Thank you for your patience.', array('@site' => variable_get('site_name', 'Drupal')))))); @@ -367,7 +341,7 @@ function drupal_site_offline() { * Generates a 404 error if the request can not be handled. */ function drupal_not_found() { - drupal_set_header($_SERVER['SERVER_PROTOCOL'] . ' 404 Not Found'); + drupal_set_header('404 Not Found'); watchdog('page not found', check_plain($_GET['q']), NULL, WATCHDOG_WARNING); @@ -401,7 +375,7 @@ function drupal_not_found() { * Generates a 403 error if the request is not allowed. */ function drupal_access_denied() { - drupal_set_header($_SERVER['SERVER_PROTOCOL'] . ' 403 Forbidden'); + drupal_set_header('403 Forbidden'); watchdog('access denied', check_plain($_GET['q']), NULL, WATCHDOG_WARNING); // Keep old path for reference. @@ -818,7 +792,7 @@ function _drupal_log_error($error, $fatal = FALSE) { } if ($fatal) { - drupal_set_header($_SERVER['SERVER_PROTOCOL'] . ' Service unavailable'); + drupal_set_header('503 Service unavailable'); drupal_set_title(t('Error')); if (!defined('MAINTENANCE_MODE') && drupal_get_bootstrap_phase() == DRUPAL_BOOTSTRAP_FULL) { // To conserve CPU and bandwidth, omit the blocks. @@ -2847,7 +2821,7 @@ function drupal_to_js($var) { */ function drupal_json($var = NULL) { // We are returning JavaScript, so tell the browser. - drupal_set_header('Content-Type: text/javascript; charset=utf-8'); + drupal_set_header('Content-Type', 'text/javascript; charset=utf-8'); if (isset($var)) { echo drupal_to_js($var); @@ -3015,7 +2989,7 @@ function _drupal_bootstrap_full() { set_exception_handler('_drupal_exception_handler'); // Emit the correct charset HTTP header. - drupal_set_header('Content-Type: text/html; charset=utf-8'); + drupal_set_header('Content-Type', 'text/html; charset=utf-8'); // Detect string handling method unicode_check(); // Undo magic quotes @@ -3047,24 +3021,35 @@ function page_set_cache() { global $user, $base_root; if (page_get_cache(FALSE)) { - $cache = TRUE; - $data = ob_get_contents(); + $cache_page = TRUE; + $cache = (object) array( + 'cid' => $base_root . request_uri(), + 'data' => ob_get_clean(), + 'expire' => CACHE_TEMPORARY, + 'created' => REQUEST_TIME, + 'headers' => drupal_get_header(), + ); if (variable_get('page_compression', TRUE) && function_exists('gzencode')) { // We do not store the data in case the zlib mode is deflate. This should // be rarely happening. if (zlib_get_coding_type() == 'deflate') { - $cache = FALSE; + $cache_page = FALSE; } elseif (zlib_get_coding_type() == FALSE) { - $data = gzencode($data, 9, FORCE_GZIP); + $cache->data = gzencode($cache->data, 9, FORCE_GZIP); } // The remaining case is 'gzip' which means the data is already // compressed and nothing left to do but to store it. } - ob_end_flush(); - if ($cache && $data) { - cache_set($base_root . request_uri(), $data, 'cache_page', CACHE_TEMPORARY, drupal_get_headers()); + if ($cache_page && $cache->data) { + cache_set($cache->cid, $cache->data, 'cache_page', $cache->expire, $cache->headers); } + drupal_page_cache_header($cache); + } + else { + // If output buffering was enabled during bootstrap, and the headers were + // not sent in the DRUPAL_BOOTSTRAP_LATE_PAGE_CACHE phase, send them now. + drupal_page_header(); } } diff --git a/includes/file.inc b/includes/file.inc index e61574501..58b3e1b20 100644 --- a/includes/file.inc +++ b/includes/file.inc @@ -1311,13 +1311,10 @@ function file_transfer($source, $headers) { ob_end_clean(); } - foreach ($headers as $header) { - // To prevent HTTP header injection, we delete new lines that are - // not followed by a space or a tab. - // See http://www.w3.org/Protocols/rfc2616/rfc2616-sec4.html#sec4.2 - $header = preg_replace('/\r?\n(?!\t| )/', '', $header); - drupal_set_header($header); + foreach ($headers as $name => $value) { + drupal_set_header($name, $value); } + drupal_send_headers(); $source = file_create_path($source); diff --git a/includes/theme.maintenance.inc b/includes/theme.maintenance.inc index f2278d670..7eb3ef6aa 100644 --- a/includes/theme.maintenance.inc +++ b/includes/theme.maintenance.inc @@ -108,7 +108,7 @@ function theme_task_list($items, $active = NULL) { * The page content to show. */ function theme_install_page($content) { - drupal_set_header('Content-Type: text/html; charset=utf-8'); + drupal_set_header('Content-Type', 'text/html; charset=utf-8'); // Assign content. $variables['content'] = $content; @@ -162,7 +162,7 @@ function theme_install_page($content) { */ function theme_update_page($content, $show_messages = TRUE) { // Set required headers. - drupal_set_header('Content-Type: text/html; charset=utf-8'); + drupal_set_header('Content-Type', 'text/html; charset=utf-8'); // Assign content and show message flag. $variables['content'] = $content; |