// Copyright (c) 2010 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #include "app/sql/connection.h" #include "base/file_path.h" #include "base/file_util.h" #include "base/message_loop.h" #include "base/utf_string_conversions.h" #include "chrome/browser/history/text_database_manager.h" #include "chrome/browser/history/visit_database.h" #include "testing/gtest/include/gtest/gtest.h" using base::Time; using base::TimeDelta; using base::TimeTicks; namespace history { namespace { const char* kURL1 = "http://www.google.com/asdf"; const char* kTitle1 = "Google A"; const char* kBody1 = "FOO page one."; const char* kURL2 = "http://www.google.com/qwer"; const char* kTitle2 = "Google B"; const char* kBody2 = "FOO two."; const char* kURL3 = "http://www.google.com/zxcv"; const char* kTitle3 = "Google C"; const char* kBody3 = "FOO drei"; const char* kURL4 = "http://www.google.com/hjkl"; const char* kTitle4 = "Google D"; const char* kBody4 = "FOO lalala four."; const char* kURL5 = "http://www.google.com/uiop"; const char* kTitle5 = "Google cinq"; const char* kBody5 = "FOO page one."; // This provides a simple implementation of a URL+VisitDatabase using an // in-memory sqlite connection. The text database manager expects to be able to // update the visit database to keep in sync. class InMemDB : public URLDatabase, public VisitDatabase { public: InMemDB() { EXPECT_TRUE(db_.OpenInMemory()); CreateURLTable(false); InitVisitTable(); } ~InMemDB() { } private: virtual sql::Connection& GetDB() { return db_; } sql::Connection db_; DISALLOW_COPY_AND_ASSIGN(InMemDB); }; // Adds all the pages once, and the first page once more in the next month. // The times of all the pages will be filled into |*times|. void AddAllPages(TextDatabaseManager& manager, VisitDatabase* visit_db, std::vector<Time>* times) { Time::Exploded exploded; memset(&exploded, 0, sizeof(Time::Exploded)); // Put the visits in two different months so it will query across databases. exploded.year = 2008; exploded.month = 1; exploded.day_of_month = 3; VisitRow visit_row; visit_row.url_id = 1; visit_row.visit_time = Time::FromUTCExploded(exploded); visit_row.referring_visit = 0; visit_row.transition = 0; visit_row.segment_id = 0; visit_row.is_indexed = false; VisitID visit_id = visit_db->AddVisit(&visit_row, SOURCE_BROWSED); times->push_back(visit_row.visit_time); manager.AddPageData(GURL(kURL1), visit_row.url_id, visit_row.visit_id, visit_row.visit_time, UTF8ToUTF16(kTitle1), UTF8ToUTF16(kBody1)); exploded.day_of_month++; visit_row.url_id = 2; visit_row.visit_time = Time::FromUTCExploded(exploded); visit_id = visit_db->AddVisit(&visit_row, SOURCE_BROWSED); times->push_back(visit_row.visit_time); manager.AddPageData(GURL(kURL2), visit_row.url_id, visit_row.visit_id, visit_row.visit_time, UTF8ToUTF16(kTitle2), UTF8ToUTF16(kBody2)); exploded.day_of_month++; visit_row.url_id = 2; visit_row.visit_time = Time::FromUTCExploded(exploded); visit_id = visit_db->AddVisit(&visit_row, SOURCE_BROWSED); times->push_back(visit_row.visit_time); manager.AddPageData(GURL(kURL3), visit_row.url_id, visit_row.visit_id, visit_row.visit_time, UTF8ToUTF16(kTitle3), UTF8ToUTF16(kBody3)); // Put the next ones in the next month. exploded.month++; visit_row.url_id = 2; visit_row.visit_time = Time::FromUTCExploded(exploded); visit_id = visit_db->AddVisit(&visit_row, SOURCE_BROWSED); times->push_back(visit_row.visit_time); manager.AddPageData(GURL(kURL4), visit_row.url_id, visit_row.visit_id, visit_row.visit_time, UTF8ToUTF16(kTitle4), UTF8ToUTF16(kBody4)); exploded.day_of_month++; visit_row.url_id = 2; visit_row.visit_time = Time::FromUTCExploded(exploded); visit_id = visit_db->AddVisit(&visit_row, SOURCE_BROWSED); times->push_back(visit_row.visit_time); manager.AddPageData(GURL(kURL5), visit_row.url_id, visit_row.visit_id, visit_row.visit_time, UTF8ToUTF16(kTitle5), UTF8ToUTF16(kBody5)); // Put the first one in again in the second month. exploded.day_of_month++; visit_row.url_id = 2; visit_row.visit_time = Time::FromUTCExploded(exploded); visit_id = visit_db->AddVisit(&visit_row, SOURCE_BROWSED); times->push_back(visit_row.visit_time); manager.AddPageData(GURL(kURL1), visit_row.url_id, visit_row.visit_id, visit_row.visit_time, UTF8ToUTF16(kTitle1), UTF8ToUTF16(kBody1)); } bool ResultsHaveURL(const std::vector<TextDatabase::Match>& results, const char* url) { GURL gurl(url); for (size_t i = 0; i < results.size(); i++) { if (results[i].url == gurl) return true; } return false; } } // namespace class TextDatabaseManagerTest : public testing::Test { public: // Called manually by the test so it can report failure to initialize. bool Init() { return file_util::CreateNewTempDirectory( FILE_PATH_LITERAL("TestSearchTest"), &dir_); } protected: void SetUp() { } void TearDown() { file_util::Delete(dir_, true); } MessageLoop message_loop_; // Directory containing the databases. FilePath dir_; }; // Tests basic querying. TEST_F(TextDatabaseManagerTest, InsertQuery) { ASSERT_TRUE(Init()); InMemDB visit_db; TextDatabaseManager manager(dir_, &visit_db, &visit_db); ASSERT_TRUE(manager.Init(NULL)); std::vector<Time> times; AddAllPages(manager, &visit_db, ×); QueryOptions options; options.begin_time = times[0] - TimeDelta::FromDays(100); options.end_time = times[times.size() - 1] + TimeDelta::FromDays(100); std::vector<TextDatabase::Match> results; Time first_time_searched; manager.GetTextMatches(UTF8ToUTF16("FOO"), options, &results, &first_time_searched); // We should have matched every page. EXPECT_EQ(6U, results.size()); EXPECT_TRUE(ResultsHaveURL(results, kURL1)); EXPECT_TRUE(ResultsHaveURL(results, kURL2)); EXPECT_TRUE(ResultsHaveURL(results, kURL3)); EXPECT_TRUE(ResultsHaveURL(results, kURL4)); EXPECT_TRUE(ResultsHaveURL(results, kURL5)); // The first time searched should have been the first page's time or before // (it could have eliminated some time for us). EXPECT_TRUE(first_time_searched <= times[0]); } // Tests that adding page components piecemeal will get them added properly. // This does not supply a visit to update, this mode is used only by the unit // tests right now, but we test it anyway. TEST_F(TextDatabaseManagerTest, InsertCompleteNoVisit) { ASSERT_TRUE(Init()); InMemDB visit_db; TextDatabaseManager manager(dir_, &visit_db, &visit_db); ASSERT_TRUE(manager.Init(NULL)); // First add one without a visit. const GURL url(kURL1); manager.AddPageURL(url, 0, 0, Time::Now()); manager.AddPageTitle(url, UTF8ToUTF16(kTitle1)); manager.AddPageContents(url, UTF8ToUTF16(kBody1)); // Check that the page got added. QueryOptions options; std::vector<TextDatabase::Match> results; Time first_time_searched; manager.GetTextMatches(UTF8ToUTF16("FOO"), options, &results, &first_time_searched); ASSERT_EQ(1U, results.size()); EXPECT_EQ(kTitle1, UTF16ToUTF8(results[0].title)); } // Like InsertCompleteNoVisit but specifies a visit to update. We check that the // visit was updated properly. TEST_F(TextDatabaseManagerTest, InsertCompleteVisit) { ASSERT_TRUE(Init()); InMemDB visit_db; TextDatabaseManager manager(dir_, &visit_db, &visit_db); ASSERT_TRUE(manager.Init(NULL)); // First add a visit to a page. We can just make up a URL ID since there is // not actually any URL database around. VisitRow visit; visit.url_id = 1; visit.visit_time = Time::Now(); visit.referring_visit = 0; visit.transition = PageTransition::LINK; visit.segment_id = 0; visit.is_indexed = false; visit_db.AddVisit(&visit, SOURCE_BROWSED); // Add a full text indexed entry for that visit. const GURL url(kURL2); manager.AddPageURL(url, visit.url_id, visit.visit_id, visit.visit_time); manager.AddPageContents(url, UTF8ToUTF16(kBody2)); manager.AddPageTitle(url, UTF8ToUTF16(kTitle2)); // Check that the page got added. QueryOptions options; std::vector<TextDatabase::Match> results; Time first_time_searched; manager.GetTextMatches(UTF8ToUTF16("FOO"), options, &results, &first_time_searched); ASSERT_EQ(1U, results.size()); EXPECT_EQ(kTitle2, UTF16ToUTF8(results[0].title)); // Check that the visit got updated for its new indexed state. VisitRow out_visit; ASSERT_TRUE(visit_db.GetRowForVisit(visit.visit_id, &out_visit)); EXPECT_TRUE(out_visit.is_indexed); } // Tests that partial inserts that expire are added to the database. TEST_F(TextDatabaseManagerTest, InsertPartial) { ASSERT_TRUE(Init()); InMemDB visit_db; TextDatabaseManager manager(dir_, &visit_db, &visit_db); ASSERT_TRUE(manager.Init(NULL)); // Add the first one with just a URL. GURL url1(kURL1); manager.AddPageURL(url1, 0, 0, Time::Now()); // Now add a second one with a URL and title. GURL url2(kURL2); manager.AddPageURL(url2, 0, 0, Time::Now()); manager.AddPageTitle(url2, UTF8ToUTF16(kTitle2)); // The third one has a URL and body. GURL url3(kURL3); manager.AddPageURL(url3, 0, 0, Time::Now()); manager.AddPageContents(url3, UTF8ToUTF16(kBody3)); // Expire stuff very fast. This assumes that the time between the first // AddPageURL and this line is less than the expiration time (20 seconds). TimeTicks added_time = TimeTicks::Now(); TimeTicks expire_time = added_time + TimeDelta::FromSeconds(5); manager.FlushOldChangesForTime(expire_time); // Do a query, nothing should be added yet. QueryOptions options; std::vector<TextDatabase::Match> results; Time first_time_searched; manager.GetTextMatches(UTF8ToUTF16("google"), options, &results, &first_time_searched); ASSERT_EQ(0U, results.size()); // Compute a time threshold that will cause everything to be flushed, and // poke at the manager's internals to cause this to happen. expire_time = added_time + TimeDelta::FromDays(1); manager.FlushOldChangesForTime(expire_time); // Now we should have all 3 URLs added. manager.GetTextMatches(UTF8ToUTF16("google"), options, &results, &first_time_searched); ASSERT_EQ(3U, results.size()); EXPECT_TRUE(ResultsHaveURL(results, kURL1)); EXPECT_TRUE(ResultsHaveURL(results, kURL2)); EXPECT_TRUE(ResultsHaveURL(results, kURL3)); } // Tests that partial inserts (due to timeouts) will still get updated if the // data comes in later. TEST_F(TextDatabaseManagerTest, PartialComplete) { ASSERT_TRUE(Init()); InMemDB visit_db; TextDatabaseManager manager(dir_, &visit_db, &visit_db); ASSERT_TRUE(manager.Init(NULL)); Time added_time = Time::Now(); GURL url(kURL1); // We have to have the URL in the URL and visit databases for this test to // work. URLRow url_row(url); url_row.set_title(UTF8ToUTF16("chocolate")); URLID url_id = visit_db.AddURL(url_row); ASSERT_TRUE(url_id); VisitRow visit_row; visit_row.url_id = url_id; visit_row.visit_time = added_time; visit_db.AddVisit(&visit_row, SOURCE_BROWSED); // Add a URL with no title or body, and say that it expired. manager.AddPageURL(url, 0, 0, added_time); TimeTicks expire_time = TimeTicks::Now() + TimeDelta::FromDays(1); manager.FlushOldChangesForTime(expire_time); // Add the title. We should be able to query based on that. The title in the // URL row we set above should not come into the picture. manager.AddPageTitle(url, UTF8ToUTF16("Some unique title")); Time first_time_searched; QueryOptions options; std::vector<TextDatabase::Match> results; manager.GetTextMatches(UTF8ToUTF16("unique"), options, &results, &first_time_searched); EXPECT_EQ(1U, results.size()); manager.GetTextMatches(UTF8ToUTF16("chocolate"), options, &results, &first_time_searched); EXPECT_EQ(0U, results.size()); // Now add the body, which should be queryable. manager.AddPageContents(url, UTF8ToUTF16("Very awesome body")); manager.GetTextMatches(UTF8ToUTF16("awesome"), options, &results, &first_time_searched); EXPECT_EQ(1U, results.size()); // Adding the body will actually copy the title from the URL table rather // than the previously indexed row (we made them not match above). This isn't // necessarily what we want, but it's how it's implemented, and we don't want // to regress it. manager.GetTextMatches(UTF8ToUTF16("chocolate"), options, &results, &first_time_searched); EXPECT_EQ(1U, results.size()); } // Tests that changes get properly committed to disk. TEST_F(TextDatabaseManagerTest, Writing) { ASSERT_TRUE(Init()); QueryOptions options; std::vector<TextDatabase::Match> results; Time first_time_searched; InMemDB visit_db; // Create the manager and write some stuff to it. { TextDatabaseManager manager(dir_, &visit_db, &visit_db); ASSERT_TRUE(manager.Init(NULL)); std::vector<Time> times; AddAllPages(manager, &visit_db, ×); // We should have matched every page. manager.GetTextMatches(UTF8ToUTF16("FOO"), options, &results, &first_time_searched); EXPECT_EQ(6U, results.size()); } results.clear(); // Recreate the manager and make sure it finds the written stuff. { TextDatabaseManager manager(dir_, &visit_db, &visit_db); ASSERT_TRUE(manager.Init(NULL)); // We should have matched every page again. manager.GetTextMatches(UTF8ToUTF16("FOO"), options, &results, &first_time_searched); EXPECT_EQ(6U, results.size()); } } // Tests that changes get properly committed to disk, as in the Writing test // above, but when there is a transaction around the adds. TEST_F(TextDatabaseManagerTest, WritingTransaction) { ASSERT_TRUE(Init()); QueryOptions options; std::vector<TextDatabase::Match> results; Time first_time_searched; InMemDB visit_db; // Create the manager and write some stuff to it. { TextDatabaseManager manager(dir_, &visit_db, &visit_db); ASSERT_TRUE(manager.Init(NULL)); std::vector<Time> times; manager.BeginTransaction(); AddAllPages(manager, &visit_db, ×); // "Forget" to commit, it should be autocommittedd for us. // We should have matched every page. manager.GetTextMatches(UTF8ToUTF16("FOO"), options, &results, &first_time_searched); EXPECT_EQ(6U, results.size()); } results.clear(); // Recreate the manager and make sure it finds the written stuff. { TextDatabaseManager manager(dir_, &visit_db, &visit_db); ASSERT_TRUE(manager.Init(NULL)); // We should have matched every page again. manager.GetTextMatches(UTF8ToUTF16("FOO"), options, &results, &first_time_searched); EXPECT_EQ(6U, results.size()); } } // Tests querying where the maximum number of items is met. TEST_F(TextDatabaseManagerTest, QueryMax) { ASSERT_TRUE(Init()); InMemDB visit_db; TextDatabaseManager manager(dir_, &visit_db, &visit_db); ASSERT_TRUE(manager.Init(NULL)); std::vector<Time> times; AddAllPages(manager, &visit_db, ×); string16 foo = UTF8ToUTF16("FOO"); QueryOptions options; options.begin_time = times[0] - TimeDelta::FromDays(100); options.end_time = times[times.size() - 1] + TimeDelta::FromDays(100); options.max_count = 2; std::vector<TextDatabase::Match> results; Time first_time_searched; manager.GetTextMatches(foo, options, &results, &first_time_searched); // We should have gotten the last two pages as results (the first page is // also the last). EXPECT_EQ(2U, results.size()); EXPECT_TRUE(first_time_searched <= times[4]); EXPECT_TRUE(ResultsHaveURL(results, kURL5)); EXPECT_TRUE(ResultsHaveURL(results, kURL1)); // Asking for 4 pages, the first one should be in another DB. options.max_count = 4; manager.GetTextMatches(foo, options, &results, &first_time_searched); EXPECT_EQ(4U, results.size()); EXPECT_TRUE(first_time_searched <= times[4]); EXPECT_TRUE(ResultsHaveURL(results, kURL3)); EXPECT_TRUE(ResultsHaveURL(results, kURL4)); EXPECT_TRUE(ResultsHaveURL(results, kURL5)); EXPECT_TRUE(ResultsHaveURL(results, kURL1)); } // Tests querying backwards in time in chunks. TEST_F(TextDatabaseManagerTest, QueryBackwards) { ASSERT_TRUE(Init()); InMemDB visit_db; TextDatabaseManager manager(dir_, &visit_db, &visit_db); ASSERT_TRUE(manager.Init(NULL)); std::vector<Time> times; AddAllPages(manager, &visit_db, ×); string16 foo = UTF8ToUTF16("FOO"); // First do a query for all time, but with a max of 2. This will give us the // last two results and will tell us where to start searching when we want // to go back in time. QueryOptions options; options.begin_time = times[0] - TimeDelta::FromDays(100); options.end_time = times[times.size() - 1] + TimeDelta::FromDays(100); options.max_count = 2; std::vector<TextDatabase::Match> results; Time first_time_searched; manager.GetTextMatches(foo, options, &results, &first_time_searched); // Check that we got the last two results. EXPECT_EQ(2U, results.size()); EXPECT_TRUE(first_time_searched <= times[4]); EXPECT_TRUE(ResultsHaveURL(results, kURL5)); EXPECT_TRUE(ResultsHaveURL(results, kURL1)); // Query the previous two URLs and make sure we got the correct ones. options.end_time = first_time_searched; manager.GetTextMatches(foo, options, &results, &first_time_searched); EXPECT_EQ(2U, results.size()); EXPECT_TRUE(first_time_searched <= times[2]); EXPECT_TRUE(ResultsHaveURL(results, kURL3)); EXPECT_TRUE(ResultsHaveURL(results, kURL4)); // Query the previous two URLs... options.end_time = first_time_searched; manager.GetTextMatches(foo, options, &results, &first_time_searched); EXPECT_EQ(2U, results.size()); EXPECT_TRUE(first_time_searched <= times[0]); EXPECT_TRUE(ResultsHaveURL(results, kURL2)); EXPECT_TRUE(ResultsHaveURL(results, kURL1)); // Try to query some more, there should be no results. options.end_time = first_time_searched; manager.GetTextMatches(foo, options, &results, &first_time_searched); EXPECT_EQ(0U, results.size()); } } // namespace history