// Copyright (c) 2011 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "chrome/browser/autofill/form_structure.h"

#include "base/basictypes.h"
#include "base/logging.h"
#include "base/sha1.h"
#include "base/string_number_conversions.h"
#include "base/utf_string_conversions.h"
#include "chrome/browser/autofill/autofill_metrics.h"
#include "chrome/browser/autofill/autofill_xml_parser.h"
#include "chrome/browser/autofill/field_types.h"
#include "chrome/browser/autofill/form_field.h"
#include "third_party/libjingle/source/talk/xmllite/xmlelement.h"
#include "webkit/glue/form_field.h"

using webkit_glue::FormData;

namespace {

const char kFormMethodPost[] = "post";

// XML elements and attributes.
const char kAttributeAcceptedFeatures[] = "accepts";
const char kAttributeAutofillUsed[] = "autofillused";
const char kAttributeAutofillType[] = "autofilltype";
const char kAttributeClientVersion[] = "clientversion";
const char kAttributeDataPresent[] = "datapresent";
const char kAttributeFormSignature[] = "formsignature";
const char kAttributeSignature[] = "signature";
const char kAcceptedFeatures[] = "e"; // e=experiments
const char kClientVersion[] = "6.1.1715.1442/en (GGLL)";
const char kXMLDeclaration[] = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>";
const char kXMLElementAutofillQuery[] = "autofillquery";
const char kXMLElementAutofillUpload[] = "autofillupload";
const char kXMLElementForm[] = "form";
const char kXMLElementField[] = "field";

// The number of fillable fields necessary for a form to be fillable.
#ifdef ANDROID
// Try and autofill more forms on Android, as filling out forms is
// more frustrating on a mobile device.
const size_t kRequiredFillableFields = 2;
#else
const size_t kRequiredFillableFields = 3;
#endif

}  // namespace

FormStructure::FormStructure(const FormData& form)
    : form_name_(form.name),
      source_url_(form.origin),
      target_url_(form.action),
      has_credit_card_field_(false),
      has_autofillable_field_(false),
      has_password_fields_(false),
      autofill_count_(0) {
  // Copy the form fields.
  std::vector<webkit_glue::FormField>::const_iterator field;
  for (field = form.fields.begin();
       field != form.fields.end(); field++) {
    // Add all supported form fields (including with empty names) to the
    // signature.  This is a requirement for Autofill servers.
    form_signature_field_names_.append("&");
    form_signature_field_names_.append(UTF16ToUTF8(field->name));

    // Generate a unique name for this field by appending a counter to the name.
    string16 unique_name = field->name +
        base::IntToString16(fields_.size() + 1);
    fields_.push_back(new AutofillField(*field, unique_name));
  }

  // Terminate the vector with a NULL item.
  fields_.push_back(NULL);

  std::string method = UTF16ToUTF8(form.method);
  if (StringToLowerASCII(method) == kFormMethodPost) {
    method_ = POST;
  } else {
    // Either the method is 'get', or we don't know.  In this case we default
    // to GET.
    method_ = GET;
  }
}

FormStructure::~FormStructure() {}

void FormStructure::DetermineHeuristicTypes() {
  has_credit_card_field_ = false;
  has_autofillable_field_ = false;
  autofill_count_ = 0;

  FieldTypeMap field_type_map;
  GetHeuristicFieldInfo(&field_type_map);

  for (size_t index = 0; index < field_count(); index++) {
    AutofillField* field = fields_[index];
    DCHECK(field);
    FieldTypeMap::iterator iter = field_type_map.find(field->unique_name());

    AutofillFieldType heuristic_autofill_type;
    if (iter == field_type_map.end()) {
      heuristic_autofill_type = UNKNOWN_TYPE;
    } else {
      heuristic_autofill_type = iter->second;
      ++autofill_count_;
    }

    field->set_heuristic_type(heuristic_autofill_type);

    AutofillType autofill_type(field->type());
    if (autofill_type.group() == AutofillType::CREDIT_CARD)
      has_credit_card_field_ = true;
    if (autofill_type.field_type() != UNKNOWN_TYPE)
      has_autofillable_field_ = true;
  }
}

bool FormStructure::EncodeUploadRequest(bool autofill_used,
                                        std::string* encoded_xml) const {
  DCHECK(encoded_xml);
  encoded_xml->clear();
  bool autofillable = ShouldBeParsed(true);
  DCHECK(autofillable);  // Caller should've checked for search pages.
  if (!autofillable)
    return false;

  // Set up the <autofillupload> element and its attributes.
  buzz::XmlElement autofill_request_xml(
      (buzz::QName(kXMLElementAutofillUpload)));
  autofill_request_xml.SetAttr(buzz::QName(kAttributeClientVersion),
                               kClientVersion);
  autofill_request_xml.SetAttr(buzz::QName(kAttributeFormSignature),
                               FormSignature());
  autofill_request_xml.SetAttr(buzz::QName(kAttributeAutofillUsed),
                               autofill_used ? "true" : "false");
  autofill_request_xml.SetAttr(buzz::QName(kAttributeDataPresent),
                               ConvertPresenceBitsToString().c_str());

  if (!EncodeFormRequest(FormStructure::UPLOAD, &autofill_request_xml))
    return false;  // Malformed form, skip it.

  // Obtain the XML structure as a string.
  *encoded_xml = kXMLDeclaration;
  *encoded_xml += autofill_request_xml.Str().c_str();

  return true;
}

// static
bool FormStructure::EncodeQueryRequest(const ScopedVector<FormStructure>& forms,
    std::vector<std::string>* encoded_signatures,
    std::string* encoded_xml) {
  DCHECK(encoded_signatures);
  DCHECK(encoded_xml);
  encoded_xml->clear();
  encoded_signatures->clear();
  encoded_signatures->reserve(forms.size());

  // Set up the <autofillquery> element and attributes.
  buzz::XmlElement autofill_request_xml(
      (buzz::QName(kXMLElementAutofillQuery)));
  autofill_request_xml.SetAttr(buzz::QName(kAttributeClientVersion),
                               kClientVersion);
  autofill_request_xml.SetAttr(buzz::QName(kAttributeAcceptedFeatures),
                               kAcceptedFeatures);

  // Some badly formatted web sites repeat forms - detect that and encode only
  // one form as returned data would be the same for all the repeated forms.
  std::set<std::string> processed_forms;
  for (ScopedVector<FormStructure>::const_iterator it = forms.begin();
       it != forms.end();
       ++it) {
    std::string signature((*it)->FormSignature());
    if (processed_forms.find(signature) != processed_forms.end())
      continue;
    processed_forms.insert(signature);
    scoped_ptr<buzz::XmlElement> encompassing_xml_element(
        new buzz::XmlElement(buzz::QName(kXMLElementForm)));
    encompassing_xml_element->SetAttr(buzz::QName(kAttributeSignature),
                                      signature);

    if (!(*it)->EncodeFormRequest(FormStructure::QUERY,
                                  encompassing_xml_element.get()))
      continue;  // Malformed form, skip it.

    autofill_request_xml.AddElement(encompassing_xml_element.release());
    encoded_signatures->push_back(signature);
  }

  if (!encoded_signatures->size())
    return false;

  // Obtain the XML structure as a string.
  *encoded_xml = kXMLDeclaration;
  *encoded_xml += autofill_request_xml.Str().c_str();

  return true;
}

// static
void FormStructure::ParseQueryResponse(const std::string& response_xml,
                                       const std::vector<FormStructure*>& forms,
                                       UploadRequired* upload_required,
                                       const AutofillMetrics& metric_logger) {
  metric_logger.Log(AutofillMetrics::QUERY_RESPONSE_RECEIVED);

  // Parse the field types from the server response to the query.
  std::vector<AutofillFieldType> field_types;
  std::string experiment_id;
  AutofillQueryXmlParser parse_handler(&field_types, upload_required,
                                       &experiment_id);
  buzz::XmlParser parser(&parse_handler);
  parser.Parse(response_xml.c_str(), response_xml.length(), true);
  if (!parse_handler.succeeded())
    return;

  metric_logger.Log(AutofillMetrics::QUERY_RESPONSE_PARSED);

  bool heuristics_detected_fillable_field = false;
  bool query_response_overrode_heuristics = false;

  // Copy the field types into the actual form.
  std::vector<AutofillFieldType>::iterator current_type = field_types.begin();
  for (std::vector<FormStructure*>::const_iterator iter = forms.begin();
       iter != forms.end(); ++iter) {
    FormStructure* form = *iter;
    form->server_experiment_id_ = experiment_id;

    if (form->has_autofillable_field_)
      heuristics_detected_fillable_field = true;

    form->has_credit_card_field_ = false;
    form->has_autofillable_field_ = false;

    for (std::vector<AutofillField*>::iterator field = form->fields_.begin();
         field != form->fields_.end(); ++field, ++current_type) {
      // The field list is terminated by a NULL AutofillField.
      if (!*field)
        break;

      // In some cases *successful* response does not return all the fields.
      // Quit the update of the types then.
      if (current_type == field_types.end())
        break;

      // UNKNOWN_TYPE is reserved for use by the client.
      DCHECK_NE(*current_type, UNKNOWN_TYPE);

      AutofillFieldType heuristic_type = (*field)->type();
      (*field)->set_server_type(*current_type);
      if (heuristic_type != (*field)->type())
        query_response_overrode_heuristics = true;

      AutofillType autofill_type((*field)->type());
      if (autofill_type.group() == AutofillType::CREDIT_CARD)
        form->has_credit_card_field_ = true;
      if (autofill_type.field_type() != UNKNOWN_TYPE)
        form->has_autofillable_field_ = true;
    }

    form->UpdateAutofillCount();
  }

  AutofillMetrics::ServerQueryMetric metric;
  if (query_response_overrode_heuristics) {
    if (heuristics_detected_fillable_field) {
      metric = AutofillMetrics::QUERY_RESPONSE_OVERRODE_LOCAL_HEURISTICS;
    } else {
      metric = AutofillMetrics::QUERY_RESPONSE_WITH_NO_LOCAL_HEURISTICS;
    }
  } else {
    metric = AutofillMetrics::QUERY_RESPONSE_MATCHED_LOCAL_HEURISTICS;
  }
  metric_logger.Log(metric);
}

std::string FormStructure::FormSignature() const {
  std::string scheme(target_url_.scheme());
  std::string host(target_url_.host());

  // If target host or scheme is empty, set scheme and host of source url.
  // This is done to match the Toolbar's behavior.
  if (scheme.empty() || host.empty()) {
    scheme = source_url_.scheme();
    host = source_url_.host();
  }

  std::string form_string = scheme + "://" + host + "&" +
                            UTF16ToUTF8(form_name_) +
                            form_signature_field_names_;

  return Hash64Bit(form_string);
}

bool FormStructure::IsAutofillable(bool require_method_post) const {
  if (autofill_count() < kRequiredFillableFields)
    return false;

  return ShouldBeParsed(require_method_post);
}

void FormStructure::UpdateAutofillCount() {
  autofill_count_ = 0;
  for (std::vector<AutofillField*>::const_iterator iter = begin();
       iter != end(); ++iter) {
    AutofillField* field = *iter;
    if (field && field->IsFieldFillable())
      ++autofill_count_;
  }
}

bool FormStructure::ShouldBeParsed(bool require_method_post) const {
  if (field_count() < kRequiredFillableFields)
    return false;

  // Rule out http(s)://*/search?...
  //  e.g. http://www.google.com/search?q=...
  //       http://search.yahoo.com/search?p=...
  if (target_url_.path() == "/search")
    return false;

  return !require_method_post || (method_ == POST);
}

void FormStructure::UpdateFromCache(const FormStructure& cached_form) {
  // Map from field signatures to cached fields.
  std::map<std::string, const AutofillField*> cached_fields;
  for (size_t i = 0; i < cached_form.field_count(); ++i) {
    const AutofillField* field = cached_form.field(i);
    cached_fields[field->FieldSignature()] = field;
  }

  for (std::vector<AutofillField*>::const_iterator iter = begin();
       iter != end(); ++iter) {
    AutofillField* field = *iter;
    if (!field)
      continue;

    std::map<std::string, const AutofillField*>::const_iterator
        cached_field = cached_fields.find(field->FieldSignature());
    if (cached_field != cached_fields.end()) {
      field->set_heuristic_type(cached_field->second->heuristic_type());
      field->set_server_type(cached_field->second->server_type());
    }
  }

  UpdateAutofillCount();

  server_experiment_id_ = cached_form.server_experiment_id();
}

void FormStructure::LogQualityMetrics(
    const AutofillMetrics& metric_logger) const {
  std::string experiment_id = server_experiment_id();
  for (size_t i = 0; i < field_count(); ++i) {
    const AutofillField* field = this->field(i);
    metric_logger.Log(AutofillMetrics::FIELD_SUBMITTED, experiment_id);

    // No further logging for empty fields nor for fields where the entered data
    // does not appear to already exist in the user's stored Autofill data.
    const FieldTypeSet& field_types = field->possible_types();
    DCHECK(!field_types.empty());
    if (field_types.count(EMPTY_TYPE) || field_types.count(UNKNOWN_TYPE))
      continue;

    // Collapse field types that Chrome treats as identical, e.g. home and
    // billing address fields.
    FieldTypeSet collapsed_field_types;
    for (FieldTypeSet::const_iterator it = field_types.begin();
         it != field_types.end();
         ++it) {
      // Since we currently only support US phone numbers, the (city code + main
      // digits) number is almost always identical to the whole phone number.
      // TODO(isherman): Improve this logic once we add support for
      // international numbers.
      if (*it == PHONE_HOME_CITY_AND_NUMBER)
        collapsed_field_types.insert(PHONE_HOME_WHOLE_NUMBER);
      else if (*it == PHONE_FAX_CITY_AND_NUMBER)
        collapsed_field_types.insert(PHONE_FAX_WHOLE_NUMBER);
      else
        collapsed_field_types.insert(AutofillType::GetEquivalentFieldType(*it));
    }

    // Capture the field's type, if it is unambiguous.
    AutofillFieldType field_type = UNKNOWN_TYPE;
    if (collapsed_field_types.size() == 1)
      field_type = *collapsed_field_types.begin();

    AutofillFieldType heuristic_type = field->heuristic_type();
    AutofillFieldType server_type = field->server_type();
    AutofillFieldType predicted_type = field->type();

    // Log heuristic, server, and overall type quality metrics, independently of
    // whether the field was autofilled.
    if (heuristic_type == UNKNOWN_TYPE) {
      metric_logger.Log(AutofillMetrics::HEURISTIC_TYPE_UNKNOWN,
                        field_type, experiment_id);
    } else if (field_types.count(heuristic_type)) {
      metric_logger.Log(AutofillMetrics::HEURISTIC_TYPE_MATCH,
                        field_type, experiment_id);
    } else {
      metric_logger.Log(AutofillMetrics::HEURISTIC_TYPE_MISMATCH,
                        field_type, experiment_id);
    }

    if (server_type == NO_SERVER_DATA) {
      metric_logger.Log(AutofillMetrics::SERVER_TYPE_UNKNOWN,
                        field_type, experiment_id);
    } else if (field_types.count(server_type)) {
      metric_logger.Log(AutofillMetrics::SERVER_TYPE_MATCH,
                        field_type, experiment_id);
    } else {
      metric_logger.Log(AutofillMetrics::SERVER_TYPE_MISMATCH,
                        field_type, experiment_id);
    }

    if (predicted_type == UNKNOWN_TYPE) {
      metric_logger.Log(AutofillMetrics::PREDICTED_TYPE_UNKNOWN,
                        field_type, experiment_id);
    } else if (field_types.count(predicted_type)) {
      metric_logger.Log(AutofillMetrics::PREDICTED_TYPE_MATCH,
                        field_type, experiment_id);
    } else {
      metric_logger.Log(AutofillMetrics::PREDICTED_TYPE_MISMATCH,
                        field_type, experiment_id);
    }

    // TODO(isherman): <select> fields don't support |is_autofilled()|, so we
    // have to skip them for the remaining metrics.
    if (field->form_control_type == ASCIIToUTF16("select-one"))
      continue;

    if (field->is_autofilled) {
      metric_logger.Log(AutofillMetrics::FIELD_AUTOFILLED, experiment_id);
    } else {
      metric_logger.Log(AutofillMetrics::FIELD_NOT_AUTOFILLED,
                        experiment_id);

      if (heuristic_type == UNKNOWN_TYPE) {
        metric_logger.Log(
            AutofillMetrics::NOT_AUTOFILLED_HEURISTIC_TYPE_UNKNOWN,
            experiment_id);
      } else if (field_types.count(heuristic_type)) {
        metric_logger.Log(AutofillMetrics::NOT_AUTOFILLED_HEURISTIC_TYPE_MATCH,
                          experiment_id);
      } else {
        metric_logger.Log(
            AutofillMetrics::NOT_AUTOFILLED_HEURISTIC_TYPE_MISMATCH,
            experiment_id);
      }

      if (server_type == NO_SERVER_DATA) {
        metric_logger.Log(AutofillMetrics::NOT_AUTOFILLED_SERVER_TYPE_UNKNOWN,
                          experiment_id);
      } else if (field_types.count(server_type)) {
        metric_logger.Log(AutofillMetrics::NOT_AUTOFILLED_SERVER_TYPE_MATCH,
                          experiment_id);
      } else {
        metric_logger.Log(AutofillMetrics::NOT_AUTOFILLED_SERVER_TYPE_MISMATCH,
                          experiment_id);
      }
    }
  }
}

void FormStructure::set_possible_types(int index, const FieldTypeSet& types) {
  int num_fields = static_cast<int>(field_count());
  DCHECK(index >= 0 && index < num_fields);
  if (index >= 0 && index < num_fields)
    fields_[index]->set_possible_types(types);
}

const AutofillField* FormStructure::field(int index) const {
  return fields_[index];
}

size_t FormStructure::field_count() const {
  // Don't count the NULL terminator.
  size_t field_size = fields_.size();
  return (field_size == 0) ? 0 : field_size - 1;
}

std::string FormStructure::server_experiment_id() const {
  return server_experiment_id_;
}

bool FormStructure::operator==(const FormData& form) const {
  // TODO(jhawkins): Is this enough to differentiate a form?
  if (form_name_ == form.name &&
      source_url_ == form.origin &&
      target_url_ == form.action) {
    return true;
  }

  // TODO(jhawkins): Compare field names, IDs and labels once we have labels
  // set up.

  return false;
}

bool FormStructure::operator!=(const FormData& form) const {
  return !operator==(form);
}

std::string FormStructure::Hash64Bit(const std::string& str) {
  std::string hash_bin = base::SHA1HashString(str);
  DCHECK_EQ(20U, hash_bin.length());

  uint64 hash64 = (((static_cast<uint64>(hash_bin[0])) & 0xFF) << 56) |
                  (((static_cast<uint64>(hash_bin[1])) & 0xFF) << 48) |
                  (((static_cast<uint64>(hash_bin[2])) & 0xFF) << 40) |
                  (((static_cast<uint64>(hash_bin[3])) & 0xFF) << 32) |
                  (((static_cast<uint64>(hash_bin[4])) & 0xFF) << 24) |
                  (((static_cast<uint64>(hash_bin[5])) & 0xFF) << 16) |
                  (((static_cast<uint64>(hash_bin[6])) & 0xFF) << 8) |
                   ((static_cast<uint64>(hash_bin[7])) & 0xFF);

  return base::Uint64ToString(hash64);
}

void FormStructure::GetHeuristicFieldInfo(FieldTypeMap* field_type_map) {
  FormFieldSet fields(this);

  FormFieldSet::const_iterator field;
  for (field = fields.begin(); field != fields.end(); field++) {
    bool ok = (*field)->GetFieldInfo(field_type_map);
    DCHECK(ok);
  }
}

bool FormStructure::EncodeFormRequest(
    FormStructure::EncodeRequestType request_type,
    buzz::XmlElement* encompassing_xml_element) const {
  if (!field_count())  // Nothing to add.
    return false;

  // Some badly formatted web sites repeat fields - limit number of fields to
  // 48, which is far larger than any valid form and XML still fits into 2K.
  // Do not send requests for forms with more than this many fields, as they are
  // near certainly not valid/auto-fillable.
  const size_t kMaxFieldsOnTheForm = 48;
  if (field_count() > kMaxFieldsOnTheForm)
    return false;

  // Add the child nodes for the form fields.
  for (size_t index = 0; index < field_count(); ++index) {
    const AutofillField* field = fields_[index];
    if (request_type == FormStructure::UPLOAD) {
      FieldTypeSet types = field->possible_types();
      // |types| could be empty in unit-tests only.
      for (FieldTypeSet::iterator field_type = types.begin();
           field_type != types.end(); ++field_type) {
        buzz::XmlElement *field_element = new buzz::XmlElement(
            buzz::QName(kXMLElementField));

        field_element->SetAttr(buzz::QName(kAttributeSignature),
                               field->FieldSignature());
        field_element->SetAttr(buzz::QName(kAttributeAutofillType),
                               base::IntToString(*field_type));
        encompassing_xml_element->AddElement(field_element);
      }
    } else {
      buzz::XmlElement *field_element = new buzz::XmlElement(
          buzz::QName(kXMLElementField));
      field_element->SetAttr(buzz::QName(kAttributeSignature),
                             field->FieldSignature());
      encompassing_xml_element->AddElement(field_element);
    }
  }
  return true;
}

std::string FormStructure::ConvertPresenceBitsToString() const {
  std::vector<uint8> presence_bitfield;
  // Determine all of the field types that were autofilled. Pack bits into
  // |presence_bitfield|. The necessary size for |presence_bitfield| is
  // ceil((MAX_VALID_FIELD_TYPE + 7) / 8) bytes (uint8).
  presence_bitfield.resize((MAX_VALID_FIELD_TYPE + 0x7) / 8);
  for (size_t i = 0; i < presence_bitfield.size(); ++i)
    presence_bitfield[i] = 0;

  for (size_t i = 0; i < field_count(); ++i) {
    const AutofillField* field = fields_[i];
    FieldTypeSet types = field->possible_types();
    // |types| could be empty in unit-tests only.
    for (FieldTypeSet::iterator field_type = types.begin();
         field_type != types.end(); ++field_type) {
      DCHECK(presence_bitfield.size() > (static_cast<size_t>(*field_type) / 8));
      // Set bit in the bitfield: byte |field_type| / 8, bit in byte
      // |field_type| % 8 from the left.
      presence_bitfield[*field_type / 8] |= (0x80 >> (*field_type % 8));
    }
  }

  std::string data_presence;
  data_presence.reserve(presence_bitfield.size() * 2 + 1);

  // Skip trailing zeroes. If all mask is 0 - return empty string.
  size_t data_end = presence_bitfield.size();
  for (; data_end > 0 && !presence_bitfield[data_end - 1]; --data_end) {
  }

  // Print all meaningfull bytes into the string.
  for (size_t i = 0; i < data_end; ++i) {
    base::StringAppendF(&data_presence, "%02x", presence_bitfield[i]);
  }

  return data_presence;
}