// Copyright 2013 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #include "content/child/site_isolation_policy.h" #include "base/basictypes.h" #include "base/command_line.h" #include "base/lazy_instance.h" #include "base/logging.h" #include "base/metrics/histogram.h" #include "base/strings/string_piece.h" #include "base/strings/string_util.h" #include "content/child/child_thread.h" #include "content/public/common/content_switches.h" #include "net/base/registry_controlled_domains/registry_controlled_domain.h" #include "net/http/http_response_headers.h" #include "third_party/WebKit/public/platform/WebHTTPHeaderVisitor.h" #include "third_party/WebKit/public/platform/WebString.h" #include "third_party/WebKit/public/platform/WebURL.h" #include "third_party/WebKit/public/platform/WebURLRequest.h" #include "third_party/WebKit/public/platform/WebURLResponse.h" #include "third_party/WebKit/public/web/WebDocument.h" #include "third_party/WebKit/public/web/WebFrame.h" #include "third_party/WebKit/public/web/WebFrameClient.h" #include "third_party/WebKit/public/web/WebSecurityOrigin.h" using base::StringPiece; using blink::WebDocument; using blink::WebString; using blink::WebURL; using blink::WebURLResponse; using blink::WebURLRequest; namespace content { namespace { // Maintain the bookkeeping data between OnReceivedResponse and // OnReceivedData. The key is a request id maintained by ResourceDispatcher. static base::LazyInstance<SiteIsolationPolicy::RequestIdToMetaDataMap> g_metadata_map = LAZY_INSTANCE_INITIALIZER; // Maintain the bookkeeping data for OnReceivedData. Blocking decision is made // when OnReceivedData is called for the first time for a request, and the // decision will remain the same for following data. This map maintains the // decision. The key is a request id maintained by ResourceDispatcher. static base::LazyInstance<SiteIsolationPolicy::RequestIdToResultMap> g_result_map = LAZY_INSTANCE_INITIALIZER; // The cross-site document blocking/UMA data collection is deactivated by // default, and only activated in renderer processes. static bool g_policy_enabled = false; // MIME types const char kTextHtml[] = "text/html"; const char kTextXml[] = "text/xml"; const char xAppRssXml[] = "application/rss+xml"; const char kAppXml[] = "application/xml"; const char kAppJson[] = "application/json"; const char kTextJson[] = "text/json"; const char kTextXjson[] = "text/x-json"; const char kTextPlain[] = "text/plain"; // TODO(dsjang): this is only needed for collecting UMA stat. Will be deleted // when this class is used for actual blocking. bool IsRenderableStatusCode(int status_code) { // Chrome only uses the content of a response with one of these status codes // for CSS/JavaScript. For images, Chrome just ignores status code. const int renderable_status_code[] = {200, 201, 202, 203, 206, 300, 301, 302, 303, 305, 306, 307}; for (size_t i = 0; i < arraysize(renderable_status_code); ++i) { if (renderable_status_code[i] == status_code) return true; } return false; } bool MatchesSignature(StringPiece data, const StringPiece signatures[], size_t arr_size) { size_t offset = data.find_first_not_of(" \t\r\n"); // There is no not-whitespace character in this document. if (offset == base::StringPiece::npos) return false; data.remove_prefix(offset); size_t length = data.length(); for (size_t sig_index = 0; sig_index < arr_size; ++sig_index) { const StringPiece& signature = signatures[sig_index]; size_t signature_length = signature.length(); if (length < signature_length) continue; if (LowerCaseEqualsASCII( data.begin(), data.begin() + signature_length, signature.data())) return true; } return false; } void IncrementHistogramCount(const std::string& name) { // The default value of min, max, bucket_count are copied from histogram.h. base::HistogramBase* histogram_pointer = base::Histogram::FactoryGet( name, 1, 100000, 50, base::HistogramBase::kUmaTargetedHistogramFlag); histogram_pointer->Add(1); } void IncrementHistogramEnum(const std::string& name, uint32 sample, uint32 boundary_value) { // The default value of min, max, bucket_count are copied from histogram.h. base::HistogramBase* histogram_pointer = base::LinearHistogram::FactoryGet( name, 1, boundary_value, boundary_value + 1, base::HistogramBase::kUmaTargetedHistogramFlag); histogram_pointer->Add(sample); } void HistogramCountBlockedResponse( const std::string& bucket_prefix, const SiteIsolationPolicy::ResponseMetaData& resp_data, bool nosniff_block) { std::string block_label(nosniff_block ? ".NoSniffBlocked" : ".Blocked"); IncrementHistogramCount(bucket_prefix + block_label); // The content is blocked if it is sniffed as HTML/JSON/XML. When // the blocked response is with an error status code, it is not // disruptive for the following reasons : 1) the blocked content is // not a binary object (such as an image) since it is sniffed as // text; 2) then, this blocking only breaks the renderer behavior // only if it is either JavaScript or CSS. However, the renderer // doesn't use the contents of JS/CSS with unaffected status code // (e.g, 404). 3) the renderer is expected not to use the cross-site // document content for purposes other than JS/CSS (e.g, XHR). bool renderable_status_code = IsRenderableStatusCode(resp_data.http_status_code); if (renderable_status_code) { IncrementHistogramEnum( bucket_prefix + block_label + ".RenderableStatusCode", resp_data.resource_type, ResourceType::LAST_TYPE); } else { IncrementHistogramCount(bucket_prefix + block_label + ".NonRenderableStatusCode"); } } void HistogramCountNotBlockedResponse(const std::string& bucket_prefix, bool sniffed_as_js) { IncrementHistogramCount(bucket_prefix + ".NotBlocked"); if (sniffed_as_js) IncrementHistogramCount(bucket_prefix + ".NotBlocked.MaybeJS"); } } // namespace SiteIsolationPolicy::ResponseMetaData::ResponseMetaData() {} void SiteIsolationPolicy::SetPolicyEnabled(bool enabled) { g_policy_enabled = enabled; } void SiteIsolationPolicy::OnReceivedResponse( int request_id, const GURL& frame_origin, const GURL& response_url, ResourceType::Type resource_type, int origin_pid, const webkit_glue::ResourceResponseInfo& info) { if (!g_policy_enabled) return; // if |origin_pid| is non-zero, it means that this response is for a plugin // spawned from this renderer process. We exclude responses for plugins for // now, but eventually, we're going to make plugin processes directly talk to // the browser process so that we don't apply cross-site document blocking to // them. if (origin_pid) return; UMA_HISTOGRAM_COUNTS("SiteIsolation.AllResponses", 1); // See if this is for navigation. If it is, don't block it, under the // assumption that we will put it in an appropriate process. if (ResourceType::IsFrame(resource_type)) return; if (!IsBlockableScheme(response_url)) return; if (IsSameSite(frame_origin, response_url)) return; SiteIsolationPolicy::ResponseMetaData::CanonicalMimeType canonical_mime_type = GetCanonicalMimeType(info.mime_type); if (canonical_mime_type == SiteIsolationPolicy::ResponseMetaData::Others) return; // Every CORS request should have the Access-Control-Allow-Origin header even // if it is preceded by a pre-flight request. Therefore, if this is a CORS // request, it has this header. response.httpHeaderField() internally uses // case-insensitive matching for the header name. std::string access_control_origin; // We can use a case-insensitive header name for EnumerateHeader(). info.headers->EnumerateHeader( NULL, "access-control-allow-origin", &access_control_origin); if (IsValidCorsHeaderSet(frame_origin, response_url, access_control_origin)) return; // Real XSD data collection starts from here. std::string no_sniff; info.headers->EnumerateHeader(NULL, "x-content-type-options", &no_sniff); ResponseMetaData resp_data; resp_data.frame_origin = frame_origin.spec(); resp_data.response_url = response_url; resp_data.resource_type = resource_type; resp_data.canonical_mime_type = canonical_mime_type; resp_data.http_status_code = info.headers->response_code(); resp_data.no_sniff = LowerCaseEqualsASCII(no_sniff, "nosniff"); (g_metadata_map.Get())[request_id] = resp_data; } bool SiteIsolationPolicy::ShouldBlockResponse( int request_id, const char* raw_data, int raw_length, std::string* alternative_data) { if (!g_policy_enabled) return false; RequestIdToMetaDataMap& metadata_map = g_metadata_map.Get(); RequestIdToResultMap& result_map = g_result_map.Get(); // If there's an entry for |request_id| in blocked_map, this request's first // data packet has already been examined. We can return the result here. if (result_map.count(request_id) != 0) { if (result_map[request_id]) { // Here, the blocking result has been set for the previous run of // ShouldBlockResponse(), so we set alternative data to an empty string so // that ResourceDispatcher doesn't call its peer's onReceivedData() with // the alternative data. alternative_data->erase(); return true; } return false; } // If result_map doesn't have an entry for |request_id|, we're receiving the // first data packet for request_id. If request_id is not registered, this // request is identified as a non-target of our policy. So we return true. if (metadata_map.count(request_id) == 0) { // We set request_id to true so that we always return true for this request. result_map[request_id] = false; return false; } StringPiece data(raw_data, raw_length); // We now look at the first data packet received for request_id. ResponseMetaData resp_data = metadata_map[request_id]; metadata_map.erase(request_id); // Record the length of the first received network packet to see if it's // enough for sniffing. UMA_HISTOGRAM_COUNTS("SiteIsolation.XSD.DataLength", raw_length); // Record the number of cross-site document responses with a specific mime // type (text/html, text/xml, etc). UMA_HISTOGRAM_ENUMERATION( "SiteIsolation.XSD.MimeType", resp_data.canonical_mime_type, SiteIsolationPolicy::ResponseMetaData::MaxCanonicalMimeType); // Store the result of cross-site document blocking analysis. bool is_blocked = false; bool sniffed_as_js = SniffForJS(data); // Record the number of responses whose content is sniffed for what its mime // type claims it to be. For example, we apply a HTML sniffer for a document // tagged with text/html here. Whenever this check becomes true, we'll block // the response. if (resp_data.canonical_mime_type != SiteIsolationPolicy::ResponseMetaData::Plain) { std::string bucket_prefix; bool sniffed_as_target_document = false; if (resp_data.canonical_mime_type == SiteIsolationPolicy::ResponseMetaData::HTML) { bucket_prefix = "SiteIsolation.XSD.HTML"; sniffed_as_target_document = SniffForHTML(data); } else if (resp_data.canonical_mime_type == SiteIsolationPolicy::ResponseMetaData::XML) { bucket_prefix = "SiteIsolation.XSD.XML"; sniffed_as_target_document = SniffForXML(data); } else if (resp_data.canonical_mime_type == SiteIsolationPolicy::ResponseMetaData::JSON) { bucket_prefix = "SiteIsolation.XSD.JSON"; sniffed_as_target_document = SniffForJSON(data); } else { NOTREACHED() << "Not a blockable mime type: " << resp_data.canonical_mime_type; } if (sniffed_as_target_document) { is_blocked = true; HistogramCountBlockedResponse(bucket_prefix, resp_data, false); } else { if (resp_data.no_sniff) { is_blocked = true; HistogramCountBlockedResponse(bucket_prefix, resp_data, true); } else { HistogramCountNotBlockedResponse(bucket_prefix, sniffed_as_js); } } } else { // This block is for plain text documents. We apply our HTML, XML, // and JSON sniffer to a text document in the order, and block it // if any of them succeeds in sniffing. std::string bucket_prefix; if (SniffForHTML(data)) bucket_prefix = "SiteIsolation.XSD.Plain.HTML"; else if (SniffForXML(data)) bucket_prefix = "SiteIsolation.XSD.Plain.XML"; else if (SniffForJSON(data)) bucket_prefix = "SiteIsolation.XSD.Plain.JSON"; if (bucket_prefix.size() > 0) { is_blocked = true; HistogramCountBlockedResponse(bucket_prefix, resp_data, false); } else if (resp_data.no_sniff) { is_blocked = true; HistogramCountBlockedResponse("SiteIsolation.XSD.Plain", resp_data, true); } else { HistogramCountNotBlockedResponse("SiteIsolation.XSD.Plain", sniffed_as_js); } } if (!CommandLine::ForCurrentProcess()->HasSwitch( switches::kBlockCrossSiteDocuments)) is_blocked = false; result_map[request_id] = is_blocked; if (is_blocked) { alternative_data->erase(); alternative_data->insert(0, " "); LOG(ERROR) << resp_data.response_url << " is blocked as an illegal cross-site document from " << resp_data.frame_origin; } return is_blocked; } void SiteIsolationPolicy::OnRequestComplete(int request_id) { if (!g_policy_enabled) return; g_metadata_map.Get().erase(request_id); g_result_map.Get().erase(request_id); } SiteIsolationPolicy::ResponseMetaData::CanonicalMimeType SiteIsolationPolicy::GetCanonicalMimeType(const std::string& mime_type) { if (LowerCaseEqualsASCII(mime_type, kTextHtml)) { return SiteIsolationPolicy::ResponseMetaData::HTML; } if (LowerCaseEqualsASCII(mime_type, kTextPlain)) { return SiteIsolationPolicy::ResponseMetaData::Plain; } if (LowerCaseEqualsASCII(mime_type, kAppJson) || LowerCaseEqualsASCII(mime_type, kTextJson) || LowerCaseEqualsASCII(mime_type, kTextXjson)) { return SiteIsolationPolicy::ResponseMetaData::JSON; } if (LowerCaseEqualsASCII(mime_type, kTextXml) || LowerCaseEqualsASCII(mime_type, xAppRssXml) || LowerCaseEqualsASCII(mime_type, kAppXml)) { return SiteIsolationPolicy::ResponseMetaData::XML; } return SiteIsolationPolicy::ResponseMetaData::Others; } bool SiteIsolationPolicy::IsBlockableScheme(const GURL& url) { // We exclude ftp:// from here. FTP doesn't provide a Content-Type // header which our policy depends on, so we cannot protect any // document from FTP servers. return url.SchemeIs("http") || url.SchemeIs("https"); } bool SiteIsolationPolicy::IsSameSite(const GURL& frame_origin, const GURL& response_url) { if (!frame_origin.is_valid() || !response_url.is_valid()) return false; if (frame_origin.scheme() != response_url.scheme()) return false; // SameDomainOrHost() extracts the effective domains (public suffix plus one) // from the two URLs and compare them. // TODO(dsjang): use INCLUDE_PRIVATE_REGISTRIES when http://crbug.com/7988 is // fixed. return net::registry_controlled_domains::SameDomainOrHost( frame_origin, response_url, net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES); } // We don't use Webkit's existing CORS policy implementation since // their policy works in terms of origins, not sites. For example, // when frame is sub.a.com and it is not allowed to access a document // with sub1.a.com. But under Site Isolation, it's allowed. bool SiteIsolationPolicy::IsValidCorsHeaderSet( const GURL& frame_origin, const GURL& website_origin, const std::string& access_control_origin) { // Many websites are sending back "\"*\"" instead of "*". This is // non-standard practice, and not supported by Chrome. Refer to // CrossOriginAccessControl::passesAccessControlCheck(). // TODO(dsjang): * is not allowed for the response from a request // with cookies. This allows for more than what the renderer will // eventually be able to receive, so we won't see illegal cross-site // documents allowed by this. We have to find a way to see if this // response is from a cookie-tagged request or not in the future. if (access_control_origin == "*") return true; // TODO(dsjang): The CORS spec only treats a fully specified URL, except for // "*", but many websites are using just a domain for access_control_origin, // and this is blocked by Webkit's CORS logic here : // CrossOriginAccessControl::passesAccessControlCheck(). GURL is set // is_valid() to false when it is created from a URL containing * in the // domain part. GURL cors_origin(access_control_origin); return IsSameSite(frame_origin, cors_origin); } // This function is a slight modification of |net::SniffForHTML|. bool SiteIsolationPolicy::SniffForHTML(StringPiece data) { // The content sniffer used by Chrome and Firefox are using "<!--" // as one of the HTML signatures, but it also appears in valid // JavaScript, considered as well-formed JS by the browser. Since // we do not want to block any JS, we exclude it from our HTML // signatures. This can weaken our document block policy, but we can // break less websites. // TODO(dsjang): parameterize |net::SniffForHTML| with an option // that decides whether to include <!-- or not, so that we can // remove this function. // TODO(dsjang): Once SiteIsolationPolicy is moved into the browser // process, we should do single-thread checking here for the static // initializer. static const StringPiece kHtmlSignatures[] = { StringPiece("<!DOCTYPE html"), // HTML5 spec StringPiece("<script"), // HTML5 spec, Mozilla StringPiece("<html"), // HTML5 spec, Mozilla StringPiece("<head"), // HTML5 spec, Mozilla StringPiece("<iframe"), // Mozilla StringPiece("<h1"), // Mozilla StringPiece("<div"), // Mozilla StringPiece("<font"), // Mozilla StringPiece("<table"), // Mozilla StringPiece("<a"), // Mozilla StringPiece("<style"), // Mozilla StringPiece("<title"), // Mozilla StringPiece("<b"), // Mozilla StringPiece("<body"), // Mozilla StringPiece("<br"), // Mozilla StringPiece("<p"), // Mozilla StringPiece("<?xml") // Mozilla }; while (data.length() > 0) { if (MatchesSignature( data, kHtmlSignatures, arraysize(kHtmlSignatures))) return true; // If we cannot find "<!--", we fail sniffing this as HTML. static const StringPiece kCommentBegins[] = { StringPiece("<!--") }; if (!MatchesSignature(data, kCommentBegins, arraysize(kCommentBegins))) break; // Search for --> and do SniffForHTML after that. If we can find the // comment's end, we start HTML sniffing from there again. static const char kEndComment[] = "-->"; size_t offset = data.find(kEndComment); if (offset == base::StringPiece::npos) break; // Proceed to the index next to the ending comment (-->). data.remove_prefix(offset + strlen(kEndComment)); } return false; } bool SiteIsolationPolicy::SniffForXML(base::StringPiece data) { // TODO(dsjang): Chrome's mime_sniffer is using strncasecmp() for // this signature. However, XML is case-sensitive. Don't we have to // be more lenient only to block documents starting with the exact // string <?xml rather than <?XML ? // TODO(dsjang): Once SiteIsolationPolicy is moved into the browser // process, we should do single-thread checking here for the static // initializer. static const StringPiece kXmlSignatures[] = { StringPiece("<?xml") }; return MatchesSignature(data, kXmlSignatures, arraysize(kXmlSignatures)); } bool SiteIsolationPolicy::SniffForJSON(base::StringPiece data) { // TODO(dsjang): We have to come up with a better way to sniff // JSON. However, even RE cannot help us that much due to the fact // that we don't do full parsing. This DFA starts with state 0, and // finds {, "/' and : in that order. We're avoiding adding a // dependency on a regular expression library. enum { kStartState, kLeftBraceState, kLeftQuoteState, kColonState, kTerminalState, } state = kStartState; size_t length = data.length(); for (size_t i = 0; i < length && state < kColonState; ++i) { const char c = data[i]; if (c == ' ' || c == '\t' || c == '\r' || c == '\n') continue; switch (state) { case kStartState: if (c == '{') state = kLeftBraceState; else state = kTerminalState; break; case kLeftBraceState: if (c == '\"' || c == '\'') state = kLeftQuoteState; else state = kTerminalState; break; case kLeftQuoteState: if (c == ':') state = kColonState; break; case kColonState: case kTerminalState: NOTREACHED(); break; } } return state == kColonState; } bool SiteIsolationPolicy::SniffForJS(StringPiece data) { // TODO(dsjang): This is a real hack. The only purpose of this function is to // try to see if there's any possibility that this data can be JavaScript // (superset of JS). This function will be removed once UMA stats are // gathered. // Search for "var " for JS detection. return data.find("var ") != base::StringPiece::npos; } } // namespace content