// Copyright 2014 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "components/omnibox/search_suggestion_parser.h"
#include "base/i18n/icu_string_conversions.h"
#include "base/json/json_string_value_serializer.h"
#include "base/json/json_writer.h"
#include "base/logging.h"
#include "base/strings/string_util.h"
#include "base/strings/utf_string_conversions.h"
#include "base/values.h"
#include "components/omnibox/autocomplete_input.h"
#include "components/omnibox/url_prefix.h"
#include "components/url_fixer/url_fixer.h"
#include "net/base/net_util.h"
#include "net/http/http_response_headers.h"
#include "net/url_request/url_fetcher.h"
#include "url/url_constants.h"
namespace {
AutocompleteMatchType::Type GetAutocompleteMatchType(const std::string& type) {
if (type == "ENTITY")
return AutocompleteMatchType::SEARCH_SUGGEST_ENTITY;
if (type == "INFINITE")
return AutocompleteMatchType::SEARCH_SUGGEST_INFINITE;
if (type == "PERSONALIZED_QUERY")
return AutocompleteMatchType::SEARCH_SUGGEST_PERSONALIZED;
if (type == "PROFILE")
return AutocompleteMatchType::SEARCH_SUGGEST_PROFILE;
if (type == "NAVIGATION")
return AutocompleteMatchType::NAVSUGGEST;
if (type == "PERSONALIZED_NAVIGATION")
return AutocompleteMatchType::NAVSUGGEST_PERSONALIZED;
return AutocompleteMatchType::SEARCH_SUGGEST;
}
} // namespace
// SearchSuggestionParser::Result ----------------------------------------------
SearchSuggestionParser::Result::Result(bool from_keyword_provider,
int relevance,
bool relevance_from_server,
AutocompleteMatchType::Type type,
const std::string& deletion_url)
: from_keyword_provider_(from_keyword_provider),
type_(type),
relevance_(relevance),
relevance_from_server_(relevance_from_server),
received_after_last_keystroke_(true),
deletion_url_(deletion_url) {}
SearchSuggestionParser::Result::~Result() {}
// SearchSuggestionParser::SuggestResult ---------------------------------------
SearchSuggestionParser::SuggestResult::SuggestResult(
const base::string16& suggestion,
AutocompleteMatchType::Type type,
const base::string16& match_contents,
const base::string16& match_contents_prefix,
const base::string16& annotation,
const base::string16& answer_contents,
const base::string16& answer_type,
const std::string& suggest_query_params,
const std::string& deletion_url,
bool from_keyword_provider,
int relevance,
bool relevance_from_server,
bool should_prefetch,
const base::string16& input_text)
: Result(from_keyword_provider,
relevance,
relevance_from_server,
type,
deletion_url),
suggestion_(suggestion),
match_contents_prefix_(match_contents_prefix),
annotation_(annotation),
suggest_query_params_(suggest_query_params),
answer_contents_(answer_contents),
answer_type_(answer_type),
should_prefetch_(should_prefetch) {
match_contents_ = match_contents;
DCHECK(!match_contents_.empty());
ClassifyMatchContents(true, input_text);
}
SearchSuggestionParser::SuggestResult::~SuggestResult() {}
void SearchSuggestionParser::SuggestResult::ClassifyMatchContents(
const bool allow_bolding_all,
const base::string16& input_text) {
if (input_text.empty()) {
// In case of zero-suggest results, do not highlight matches.
match_contents_class_.push_back(
ACMatchClassification(0, ACMatchClassification::NONE));
return;
}
base::string16 lookup_text = input_text;
if (type_ == AutocompleteMatchType::SEARCH_SUGGEST_INFINITE) {
const size_t contents_index =
suggestion_.length() - match_contents_.length();
// Ensure the query starts with the input text, and ends with the match
// contents, and the input text has an overlap with contents.
if (StartsWith(suggestion_, input_text, true) &&
EndsWith(suggestion_, match_contents_, true) &&
(input_text.length() > contents_index)) {
lookup_text = input_text.substr(contents_index);
}
}
size_t lookup_position = match_contents_.find(lookup_text);
if (!allow_bolding_all && (lookup_position == base::string16::npos)) {
// Bail if the code below to update the bolding would bold the whole
// string. Note that the string may already be entirely bolded; if
// so, leave it as is.
return;
}
match_contents_class_.clear();
// We do intra-string highlighting for suggestions - the suggested segment
// will be highlighted, e.g. for input_text = "you" the suggestion may be
// "youtube", so we'll bold the "tube" section: you*tube*.
if (input_text != match_contents_) {
if (lookup_position == base::string16::npos) {
// The input text is not a substring of the query string, e.g. input
// text is "slasdot" and the query string is "slashdot", so we bold the
// whole thing.
match_contents_class_.push_back(
ACMatchClassification(0, ACMatchClassification::MATCH));
} else {
// We don't iterate over the string here annotating all matches because
// it looks odd to have every occurrence of a substring that may be as
// short as a single character highlighted in a query suggestion result,
// e.g. for input text "s" and query string "southwest airlines", it
// looks odd if both the first and last s are highlighted.
if (lookup_position != 0) {
match_contents_class_.push_back(
ACMatchClassification(0, ACMatchClassification::MATCH));
}
match_contents_class_.push_back(
ACMatchClassification(lookup_position, ACMatchClassification::NONE));
size_t next_fragment_position = lookup_position + lookup_text.length();
if (next_fragment_position < match_contents_.length()) {
match_contents_class_.push_back(ACMatchClassification(
next_fragment_position, ACMatchClassification::MATCH));
}
}
} else {
// Otherwise, match_contents_ is a verbatim (what-you-typed) match, either
// for the default provider or a keyword search provider.
match_contents_class_.push_back(
ACMatchClassification(0, ACMatchClassification::NONE));
}
}
int SearchSuggestionParser::SuggestResult::CalculateRelevance(
const AutocompleteInput& input,
bool keyword_provider_requested) const {
if (!from_keyword_provider_ && keyword_provider_requested)
return 100;
return ((input.type() == metrics::OmniboxInputType::URL) ? 300 : 600);
}
// SearchSuggestionParser::NavigationResult ------------------------------------
SearchSuggestionParser::NavigationResult::NavigationResult(
const AutocompleteSchemeClassifier& scheme_classifier,
const GURL& url,
AutocompleteMatchType::Type type,
const base::string16& description,
const std::string& deletion_url,
bool from_keyword_provider,
int relevance,
bool relevance_from_server,
const base::string16& input_text,
const std::string& languages)
: Result(from_keyword_provider, relevance, relevance_from_server, type,
deletion_url),
url_(url),
formatted_url_(AutocompleteInput::FormattedStringWithEquivalentMeaning(
url, net::FormatUrl(url, languages,
net::kFormatUrlOmitAll & ~net::kFormatUrlOmitHTTP,
net::UnescapeRule::SPACES, NULL, NULL, NULL),
scheme_classifier)),
description_(description) {
DCHECK(url_.is_valid());
CalculateAndClassifyMatchContents(true, input_text, languages);
}
SearchSuggestionParser::NavigationResult::~NavigationResult() {}
void
SearchSuggestionParser::NavigationResult::CalculateAndClassifyMatchContents(
const bool allow_bolding_nothing,
const base::string16& input_text,
const std::string& languages) {
if (input_text.empty()) {
// In case of zero-suggest results, do not highlight matches.
match_contents_class_.push_back(
ACMatchClassification(0, ACMatchClassification::NONE));
return;
}
// First look for the user's input inside the formatted url as it would be
// without trimming the scheme, so we can find matches at the beginning of the
// scheme.
const URLPrefix* prefix =
URLPrefix::BestURLPrefix(formatted_url_, input_text);
size_t match_start = (prefix == NULL) ?
formatted_url_.find(input_text) : prefix->prefix.length();
bool trim_http = !AutocompleteInput::HasHTTPScheme(input_text) &&
(!prefix || (match_start != 0));
const net::FormatUrlTypes format_types =
net::kFormatUrlOmitAll & ~(trim_http ? 0 : net::kFormatUrlOmitHTTP);
base::string16 match_contents = net::FormatUrl(url_, languages, format_types,
net::UnescapeRule::SPACES, NULL, NULL, &match_start);
// If the first match in the untrimmed string was inside a scheme that we
// trimmed, look for a subsequent match.
if (match_start == base::string16::npos)
match_start = match_contents.find(input_text);
// Update |match_contents_| and |match_contents_class_| if it's allowed.
if (allow_bolding_nothing || (match_start != base::string16::npos)) {
match_contents_ = match_contents;
// Safe if |match_start| is npos; also safe if the input is longer than the
// remaining contents after |match_start|.
AutocompleteMatch::ClassifyLocationInString(match_start,
input_text.length(), match_contents_.length(),
ACMatchClassification::URL, &match_contents_class_);
}
}
int SearchSuggestionParser::NavigationResult::CalculateRelevance(
const AutocompleteInput& input,
bool keyword_provider_requested) const {
return (from_keyword_provider_ || !keyword_provider_requested) ? 800 : 150;
}
// SearchSuggestionParser::Results ---------------------------------------------
SearchSuggestionParser::Results::Results()
: verbatim_relevance(-1),
field_trial_triggered(false),
relevances_from_server(false) {}
SearchSuggestionParser::Results::~Results() {}
void SearchSuggestionParser::Results::Clear() {
suggest_results.clear();
navigation_results.clear();
verbatim_relevance = -1;
metadata.clear();
}
bool SearchSuggestionParser::Results::HasServerProvidedScores() const {
if (verbatim_relevance >= 0)
return true;
// Right now either all results of one type will be server-scored or they will
// all be locally scored, but in case we change this later, we'll just check
// them all.
for (SuggestResults::const_iterator i(suggest_results.begin());
i != suggest_results.end(); ++i) {
if (i->relevance_from_server())
return true;
}
for (NavigationResults::const_iterator i(navigation_results.begin());
i != navigation_results.end(); ++i) {
if (i->relevance_from_server())
return true;
}
return false;
}
// SearchSuggestionParser ------------------------------------------------------
// static
std::string SearchSuggestionParser::ExtractJsonData(
const net::URLFetcher* source) {
const net::HttpResponseHeaders* const response_headers =
source->GetResponseHeaders();
std::string json_data;
source->GetResponseAsString(&json_data);
// JSON is supposed to be UTF-8, but some suggest service providers send
// JSON files in non-UTF-8 encodings. The actual encoding is usually
// specified in the Content-Type header field.
if (response_headers) {
std::string charset;
if (response_headers->GetCharset(&charset)) {
base::string16 data_16;
// TODO(jungshik): Switch to CodePageToUTF8 after it's added.
if (base::CodepageToUTF16(json_data, charset.c_str(),
base::OnStringConversionError::FAIL,
&data_16))
json_data = base::UTF16ToUTF8(data_16);
}
}
return json_data;
}
// static
scoped_ptr<base::Value> SearchSuggestionParser::DeserializeJsonData(
std::string json_data) {
// The JSON response should be an array.
for (size_t response_start_index = json_data.find("["), i = 0;
response_start_index != std::string::npos && i < 5;
response_start_index = json_data.find("[", 1), i++) {
// Remove any XSSI guards to allow for JSON parsing.
if (response_start_index > 0)
json_data.erase(0, response_start_index);
JSONStringValueSerializer deserializer(json_data);
deserializer.set_allow_trailing_comma(true);
int error_code = 0;
scoped_ptr<base::Value> data(deserializer.Deserialize(&error_code, NULL));
if (error_code == 0)
return data.Pass();
}
return scoped_ptr<base::Value>();
}
// static
bool SearchSuggestionParser::ParseSuggestResults(
const base::Value& root_val,
const AutocompleteInput& input,
const AutocompleteSchemeClassifier& scheme_classifier,
int default_result_relevance,
const std::string& languages,
bool is_keyword_result,
Results* results) {
base::string16 query;
const base::ListValue* root_list = NULL;
const base::ListValue* results_list = NULL;
if (!root_val.GetAsList(&root_list) || !root_list->GetString(0, &query) ||
query != input.text() || !root_list->GetList(1, &results_list))
return false;
// 3rd element: Description list.
const base::ListValue* descriptions = NULL;
root_list->GetList(2, &descriptions);
// 4th element: Disregard the query URL list for now.
// Reset suggested relevance information.
results->verbatim_relevance = -1;
// 5th element: Optional key-value pairs from the Suggest server.
const base::ListValue* types = NULL;
const base::ListValue* relevances = NULL;
const base::ListValue* suggestion_details = NULL;
const base::DictionaryValue* extras = NULL;
int prefetch_index = -1;
if (root_list->GetDictionary(4, &extras)) {
extras->GetList("google:suggesttype", &types);
// Discard this list if its size does not match that of the suggestions.
if (extras->GetList("google:suggestrelevance", &relevances) &&
(relevances->GetSize() != results_list->GetSize()))
relevances = NULL;
extras->GetInteger("google:verbatimrelevance",
&results->verbatim_relevance);
// Check if the active suggest field trial (if any) has triggered either
// for the default provider or keyword provider.
results->field_trial_triggered = false;
extras->GetBoolean("google:fieldtrialtriggered",
&results->field_trial_triggered);
const base::DictionaryValue* client_data = NULL;
if (extras->GetDictionary("google:clientdata", &client_data) && client_data)
client_data->GetInteger("phi", &prefetch_index);
if (extras->GetList("google:suggestdetail", &suggestion_details) &&
suggestion_details->GetSize() != results_list->GetSize())
suggestion_details = NULL;
// Store the metadata that came with the response in case we need to pass it
// along with the prefetch query to Instant.
JSONStringValueSerializer json_serializer(&results->metadata);
json_serializer.Serialize(*extras);
}
// Clear the previous results now that new results are available.
results->suggest_results.clear();
results->navigation_results.clear();
results->answers_image_urls.clear();
base::string16 suggestion;
std::string type;
int relevance = default_result_relevance;
// Prohibit navsuggest in FORCED_QUERY mode. Users wants queries, not URLs.
const bool allow_navsuggest =
input.type() != metrics::OmniboxInputType::FORCED_QUERY;
const base::string16& trimmed_input =
base::CollapseWhitespace(input.text(), false);
for (size_t index = 0; results_list->GetString(index, &suggestion); ++index) {
// Google search may return empty suggestions for weird input characters,
// they make no sense at all and can cause problems in our code.
if (suggestion.empty())
continue;
// Apply valid suggested relevance scores; discard invalid lists.
if (relevances != NULL && !relevances->GetInteger(index, &relevance))
relevances = NULL;
AutocompleteMatchType::Type match_type =
AutocompleteMatchType::SEARCH_SUGGEST;
if (types && types->GetString(index, &type))
match_type = GetAutocompleteMatchType(type);
const base::DictionaryValue* suggestion_detail = NULL;
std::string deletion_url;
if (suggestion_details &&
suggestion_details->GetDictionary(index, &suggestion_detail))
suggestion_detail->GetString("du", &deletion_url);
if ((match_type == AutocompleteMatchType::NAVSUGGEST) ||
(match_type == AutocompleteMatchType::NAVSUGGEST_PERSONALIZED)) {
// Do not blindly trust the URL coming from the server to be valid.
GURL url(
url_fixer::FixupURL(base::UTF16ToUTF8(suggestion), std::string()));
if (url.is_valid() && allow_navsuggest) {
base::string16 title;
if (descriptions != NULL)
descriptions->GetString(index, &title);
results->navigation_results.push_back(NavigationResult(
scheme_classifier, url, match_type, title, deletion_url,
is_keyword_result, relevance, relevances != NULL, input.text(),
languages));
}
} else {
base::string16 match_contents = suggestion;
base::string16 match_contents_prefix;
base::string16 annotation;
base::string16 answer_contents;
base::string16 answer_type;
std::string suggest_query_params;
if (suggestion_details) {
suggestion_details->GetDictionary(index, &suggestion_detail);
if (suggestion_detail) {
suggestion_detail->GetString("t", &match_contents);
suggestion_detail->GetString("mp", &match_contents_prefix);
// Error correction for bad data from server.
if (match_contents.empty())
match_contents = suggestion;
suggestion_detail->GetString("a", &annotation);
suggestion_detail->GetString("q", &suggest_query_params);
// Extract Answers, if provided.
const base::DictionaryValue* answer_json = NULL;
if (suggestion_detail->GetDictionary("ansa", &answer_json)) {
match_type = AutocompleteMatchType::SEARCH_SUGGEST_ANSWER;
GetAnswersImageURLs(answer_json, &results->answers_image_urls);
std::string contents;
base::JSONWriter::Write(answer_json, &contents);
answer_contents = base::UTF8ToUTF16(contents);
suggestion_detail->GetString("ansb", &answer_type);
}
}
}
bool should_prefetch = static_cast<int>(index) == prefetch_index;
// TODO(kochi): Improve calculator suggestion presentation.
results->suggest_results.push_back(SuggestResult(
base::CollapseWhitespace(suggestion, false), match_type,
base::CollapseWhitespace(match_contents, false),
match_contents_prefix, annotation, answer_contents, answer_type,
suggest_query_params, deletion_url, is_keyword_result, relevance,
relevances != NULL, should_prefetch, trimmed_input));
}
}
results->relevances_from_server = relevances != NULL;
return true;
}
// static
void SearchSuggestionParser::GetAnswersImageURLs(
const base::DictionaryValue* answer_json,
std::vector<GURL>* urls) {
DCHECK(answer_json);
const base::ListValue* lines = NULL;
if (!answer_json->GetList("l", &lines) || !lines || lines->GetSize() == 0)
return;
for (base::ListValue::const_iterator iter = lines->begin();
iter != lines->end();
++iter) {
const base::DictionaryValue* line = NULL;
if (!(*iter)->GetAsDictionary(&line) || !line)
continue;
std::string image_host_and_path;
if (!line->GetString("il.i.d", &image_host_and_path) ||
image_host_and_path.empty())
continue;
// Concatenate scheme and host/path using only ':' as separator. This is
// due to the results delivering strings of the form '//host/path', which
// is web-speak for "use the enclosing page's scheme", but not a valid path
// of an URL.
GURL image_url(
GURL(std::string(url::kHttpsScheme) + ":" + image_host_and_path));
if (image_url.is_valid())
urls->push_back(image_url);
}
}