From d568a1e9fd63c97eb86cd84a51bb6770747ec37c Mon Sep 17 00:00:00 2001 From: Bharat Mediratta Date: Sat, 17 Jan 2009 00:52:50 +0000 Subject: Implement relevance ranked boolean searching on a full text index of item and comment data. Whew! It's not pretty yet. And you have to manually update the index currently in admin/maintenance. But it works. --- modules/comment/helpers/comment_installer.php | 3 ++ modules/comment/helpers/comment_search.php | 34 ++++++++++++ modules/search/controllers/search.php | 43 +++++++++++++++ modules/search/helpers/search.php | 77 +++++++++++++++++++++++++++ modules/search/helpers/search_event.php | 49 +++++++++++++++++ modules/search/helpers/search_installer.php | 17 ++++++ modules/search/helpers/search_task.php | 61 +++++++++++++++++++++ modules/search/models/search_record.php | 24 +++++++++ modules/search/views/search.html.php | 35 ++++++++++++ modules/search/views/search_link.html.php | 4 +- 10 files changed, 345 insertions(+), 2 deletions(-) create mode 100644 modules/comment/helpers/comment_search.php create mode 100644 modules/search/controllers/search.php create mode 100644 modules/search/helpers/search.php create mode 100644 modules/search/helpers/search_event.php create mode 100644 modules/search/helpers/search_task.php create mode 100644 modules/search/models/search_record.php create mode 100644 modules/search/views/search.html.php (limited to 'modules') diff --git a/modules/comment/helpers/comment_installer.php b/modules/comment/helpers/comment_installer.php index aba32566..651065f7 100644 --- a/modules/comment/helpers/comment_installer.php +++ b/modules/comment/helpers/comment_installer.php @@ -58,6 +58,9 @@ class comment_installer { static function uninstall() { $db = Database::instance(); + $sql = "SELECT `item_id` FROM `comments`"; + module::event("item_related_update_batch", $sql); + $db->query("DROP TABLE IF EXISTS `comments`;"); module::delete("comment"); } diff --git a/modules/comment/helpers/comment_search.php b/modules/comment/helpers/comment_search.php new file mode 100644 index 00000000..79a0c4bc --- /dev/null +++ b/modules/comment/helpers/comment_search.php @@ -0,0 +1,34 @@ +select("text") + ->from("comments") + ->where("item_id", $item->id) + ->get() + ->as_array() as $row) { + $data[] = $row->text; + } + return join(" ", $data); + } +} diff --git a/modules/search/controllers/search.php b/modules/search/controllers/search.php new file mode 100644 index 00000000..f7313259 --- /dev/null +++ b/modules/search/controllers/search.php @@ -0,0 +1,43 @@ +input->get("q"); + $page = $this->input->get("page", 1); + $offset = ($page - 1) * $page_size; + + // Make sure that the page references a valid offset + if ($page < 1) { + $page = 1; + } + + list ($count, $result) = search::search($q, $page_size, $offset); + $template = new Theme_View("page.html", "search"); + $template->set_global("page_size", $page_size); + $template->set_global("children_count", $count); + + $template->content = new View("search.html"); + $template->content->items = $result; + $template->content->q = $q; + + print $template; + } +} diff --git a/modules/search/helpers/search.php b/modules/search/helpers/search.php new file mode 100644 index 00000000..02c5f184 --- /dev/null +++ b/modules/search/helpers/search.php @@ -0,0 +1,77 @@ +escape_str($q); + + if (!user::active()->admin) { + foreach (user::group_ids() as $id) { + $fields[] = "`view_$id` = " . access::ALLOW; + } + $accesS_sql = "AND (" . join(" AND ", $fields) . ")"; + } else { + $access_sql = ""; + } + + // Count the total number of rows. We can't do this with our regular query because of the + // limit statement. It's possible that if we get rid of the limit (but keep the offset) on + // the 2nd query and combine the two, it might be faster than making 2 separate queries. + $count_query = "SELECT COUNT(*) AS C " . + "FROM `items` JOIN `search_records` ON (`items`.`id` = `search_records`.`item_id`) " . + "WHERE MATCH(`search_records`.`data`) AGAINST ('$q' IN BOOLEAN MODE) " . + $access_sql; + $count = $db->query($count_query)->current()->C; + + $query = "SELECT `items`.*, MATCH(`search_records`.`data`) AGAINST ('$q') AS `score` " . + "FROM `items` JOIN `search_records` ON (`items`.`id` = `search_records`.`item_id`) " . + "WHERE MATCH(`search_records`.`data`) AGAINST ('$q' IN BOOLEAN MODE) " . + $access_sql . + "ORDER BY `score` DESC " . + "LIMIT $offset, $limit"; + + return array($count, new ORM_Iterator(ORM::factory("item"), $db->query($query))); + } + + static function check_index() { + if ($count = ORM::factory("search_record")->where("dirty", 1)->count_all()) { + site_status::warning( + t("Your search index needs to be updated. %link_startFix this now%link_end", + array("link_start" => "", + "link_end" => "")), + "search_index_out_of_date"); + } + } + + static function update_record($record) { + $data = array(); + foreach (module::installed() as $module_name => $module_info) { + $class_name = "{$module_name}_search"; + if (method_exists($class_name, "item_index_data")) { + $data[] = call_user_func(array($class_name, "item_index_data"), $record->item()); + } + } + $record->data = join(" ", $data); + $record->dirty = 0; + $record->save(); + } +} diff --git a/modules/search/helpers/search_event.php b/modules/search/helpers/search_event.php new file mode 100644 index 00000000..a29d301b --- /dev/null +++ b/modules/search/helpers/search_event.php @@ -0,0 +1,49 @@ +item_id($item->id)->save(); + } + + static function item_updated($old_item, $new_item) { + Database::instance() + ->from("search_records") + ->set("dirty", 1) + ->where("item_id", $new_item->id) + ->update(); + } + + static function item_before_delete($item) { + ORM::factory("item_id", $item->id)->delete_all(); + } + + static function item_related_update($item) { + Database::instance() + ->from("search_records") + ->set("dirty", 1) + ->where("item_id", $item->id) + ->update(); + } + + static function item_related_update_batch($sql) { + $db = Database::instance(); + $db->query("UPDATE `search_records` SET `dirty` = 1 WHERE item_id IN ($sql)"); + } +} diff --git a/modules/search/helpers/search_installer.php b/modules/search/helpers/search_installer.php index 7dc4c39a..ae8eacd3 100644 --- a/modules/search/helpers/search_installer.php +++ b/modules/search/helpers/search_installer.php @@ -20,12 +20,29 @@ class search_installer { static function install() { $version = module::get_version("search"); + $db = Database::instance(); if ($version == 0) { + $db->query("CREATE TABLE `search_records` ( + `id` int(9) NOT NULL auto_increment, + `item_id` int(9), + `dirty` boolean default 1, + `data` LONGTEXT default NULL, + PRIMARY KEY (`id`), + FULLTEXT INDEX (`data`)) + ENGINE=MyISAM DEFAULT CHARSET=utf8;"); + + // populate the index with dirty records + $db->query("insert into `search_records` (`item_id`) SELECT `id` FROM `items`"); module::set_version("search", 1); + + search::check_index(); } } static function uninstall() { + $db = Database::instance(); + $db->query("DROP TABLE `search_records`"); + site_status::clear("search_index_out_of_date"); module::delete("search"); } } diff --git a/modules/search/helpers/search_task.php b/modules/search/helpers/search_task.php new file mode 100644 index 00000000..5291a27b --- /dev/null +++ b/modules/search/helpers/search_task.php @@ -0,0 +1,61 @@ + +callback("search_task::update_index") + ->name(t("Update Search Index")) + ->description($remaining ? + t("Search index is %percent% up-to-date", + array("percent" => $percent)) + : t("Search index is up to date")) + ->severity($remaining ? log::WARNING : log::SUCCESS)); + } + + static function update_index($task) { + $completed = $task->get("completed", 0); + foreach (ORM::factory("search_record")->where("dirty", 1)->limit(2)->find_all() as $record) { + search::update_record($record); + $completed++; + } + $task->set("completed", $completed); + + list ($remaining, $total, $percent) = self::_get_stats(); + $task->percent_complete = round(100 * $completed / ($remaining + $completed)); + + $task->status = t("%done records records updated, index is %percent% up-to-date", + array("done" => $completed, "percent" => $percent)); + + if ($remaining == 0) { + $task->done = true; + $task->state = "success"; + site_status::clear("search_index_out_of_date"); + } + } + + private static function _get_stats() { + $remaining = ORM::factory("search_record")->where("dirty", 1)->count_all(); + $total = ORM::factory("search_record")->count_all(); + $percent = round(100 * ($total - $remaining) / $total); + return array($remaining, $total, $percent); + } +} diff --git a/modules/search/models/search_record.php b/modules/search/models/search_record.php new file mode 100644 index 00000000..102641ae --- /dev/null +++ b/modules/search/models/search_record.php @@ -0,0 +1,24 @@ +item_id); + } +} diff --git a/modules/search/views/search.html.php b/modules/search/views/search.html.php new file mode 100644 index 00000000..86a29db5 --- /dev/null +++ b/modules/search/views/search.html.php @@ -0,0 +1,35 @@ + +
+
"> +
+ + + +
    +
  • + +
  • +
  • + +
  • +
+
+
+ + +
+pager() ?> diff --git a/modules/search/views/search_link.html.php b/modules/search/views/search_link.html.php index d5a5c47b..7be2b101 100644 --- a/modules/search/views/search_link.html.php +++ b/modules/search/views/search_link.html.php @@ -1,9 +1,9 @@ -
+" id="gSearchForm">