diff options
author | Bharat Mediratta <bharat@menalto.com> | 2009-01-17 00:52:50 +0000 |
---|---|---|
committer | Bharat Mediratta <bharat@menalto.com> | 2009-01-17 00:52:50 +0000 |
commit | d568a1e9fd63c97eb86cd84a51bb6770747ec37c (patch) | |
tree | 2b382bce985bce886c69d6d58377c5ce71a0cdc6 | |
parent | 91be7a38f883cd008d370f81ff68949de68eafbf (diff) |
Implement relevance ranked boolean searching on a full text index of
item and comment data. Whew!
It's not pretty yet. And you have to manually update the index
currently in admin/maintenance. But it works.
-rw-r--r-- | core/helpers/core_search.php | 24 | ||||
-rw-r--r-- | modules/comment/helpers/comment_installer.php | 3 | ||||
-rw-r--r-- | modules/comment/helpers/comment_search.php | 34 | ||||
-rw-r--r-- | modules/search/controllers/search.php | 43 | ||||
-rw-r--r-- | modules/search/helpers/search.php | 77 | ||||
-rw-r--r-- | modules/search/helpers/search_event.php | 49 | ||||
-rw-r--r-- | modules/search/helpers/search_installer.php | 17 | ||||
-rw-r--r-- | modules/search/helpers/search_task.php | 61 | ||||
-rw-r--r-- | modules/search/models/search_record.php | 24 | ||||
-rw-r--r-- | modules/search/views/search.html.php | 35 | ||||
-rw-r--r-- | modules/search/views/search_link.html.php | 4 |
11 files changed, 369 insertions, 2 deletions
diff --git a/core/helpers/core_search.php b/core/helpers/core_search.php new file mode 100644 index 00000000..be11ab81 --- /dev/null +++ b/core/helpers/core_search.php @@ -0,0 +1,24 @@ +<?php defined("SYSPATH") or die("No direct script access."); +/** + * Gallery - a web based photo album viewer and editor + * Copyright (C) 2000-2008 Bharat Mediratta + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or (at + * your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA. + */ +class Core_Search_Core { + static function item_index_data($item) { + return join(" ", array($item->description, $item->name, $item->title)); + } +} diff --git a/modules/comment/helpers/comment_installer.php b/modules/comment/helpers/comment_installer.php index aba32566..651065f7 100644 --- a/modules/comment/helpers/comment_installer.php +++ b/modules/comment/helpers/comment_installer.php @@ -58,6 +58,9 @@ class comment_installer { static function uninstall() { $db = Database::instance(); + $sql = "SELECT `item_id` FROM `comments`"; + module::event("item_related_update_batch", $sql); + $db->query("DROP TABLE IF EXISTS `comments`;"); module::delete("comment"); } diff --git a/modules/comment/helpers/comment_search.php b/modules/comment/helpers/comment_search.php new file mode 100644 index 00000000..79a0c4bc --- /dev/null +++ b/modules/comment/helpers/comment_search.php @@ -0,0 +1,34 @@ +<?php defined("SYSPATH") or die("No direct script access."); +/** + * Gallery - a web based photo album viewer and editor + * Copyright (C) 2000-2008 Bharat Mediratta + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or (at + * your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA. + */ + +class Comment_Search_Core { + static function item_index_data($item) { + $data = array(); + foreach (Database::instance() + ->select("text") + ->from("comments") + ->where("item_id", $item->id) + ->get() + ->as_array() as $row) { + $data[] = $row->text; + } + return join(" ", $data); + } +} diff --git a/modules/search/controllers/search.php b/modules/search/controllers/search.php new file mode 100644 index 00000000..f7313259 --- /dev/null +++ b/modules/search/controllers/search.php @@ -0,0 +1,43 @@ +<?php defined("SYSPATH") or die("No direct script access."); +/** + * Gallery - a web based photo album viewer and editor + * Copyright (C) 2000-2008 Bharat Mediratta + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or (at + * your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA. + */ +class Search_Controller extends Controller { + public function index() { + $page_size = module::get_var("core", "page_size", 9); + $q = $this->input->get("q"); + $page = $this->input->get("page", 1); + $offset = ($page - 1) * $page_size; + + // Make sure that the page references a valid offset + if ($page < 1) { + $page = 1; + } + + list ($count, $result) = search::search($q, $page_size, $offset); + $template = new Theme_View("page.html", "search"); + $template->set_global("page_size", $page_size); + $template->set_global("children_count", $count); + + $template->content = new View("search.html"); + $template->content->items = $result; + $template->content->q = $q; + + print $template; + } +} diff --git a/modules/search/helpers/search.php b/modules/search/helpers/search.php new file mode 100644 index 00000000..02c5f184 --- /dev/null +++ b/modules/search/helpers/search.php @@ -0,0 +1,77 @@ +<?php defined("SYSPATH") or die("No direct script access."); +/** + * Gallery - a web based photo album viewer and editor + * Copyright (C) 2000-2008 Bharat Mediratta + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or (at + * your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA. + */ +class search_Core { + static function search($q, $limit, $offset) { + $db = Database::instance(); + $q = $db->escape_str($q); + + if (!user::active()->admin) { + foreach (user::group_ids() as $id) { + $fields[] = "`view_$id` = " . access::ALLOW; + } + $accesS_sql = "AND (" . join(" AND ", $fields) . ")"; + } else { + $access_sql = ""; + } + + // Count the total number of rows. We can't do this with our regular query because of the + // limit statement. It's possible that if we get rid of the limit (but keep the offset) on + // the 2nd query and combine the two, it might be faster than making 2 separate queries. + $count_query = "SELECT COUNT(*) AS C " . + "FROM `items` JOIN `search_records` ON (`items`.`id` = `search_records`.`item_id`) " . + "WHERE MATCH(`search_records`.`data`) AGAINST ('$q' IN BOOLEAN MODE) " . + $access_sql; + $count = $db->query($count_query)->current()->C; + + $query = "SELECT `items`.*, MATCH(`search_records`.`data`) AGAINST ('$q') AS `score` " . + "FROM `items` JOIN `search_records` ON (`items`.`id` = `search_records`.`item_id`) " . + "WHERE MATCH(`search_records`.`data`) AGAINST ('$q' IN BOOLEAN MODE) " . + $access_sql . + "ORDER BY `score` DESC " . + "LIMIT $offset, $limit"; + + return array($count, new ORM_Iterator(ORM::factory("item"), $db->query($query))); + } + + static function check_index() { + if ($count = ORM::factory("search_record")->where("dirty", 1)->count_all()) { + site_status::warning( + t("Your search index needs to be updated. %link_startFix this now%link_end", + array("link_start" => "<a href=\"" . + url::site("admin/maintenance/start/search_task::rebuild_index?csrf=__CSRF__") . + "\" class=\"gDialogLink\">", + "link_end" => "</a>")), + "search_index_out_of_date"); + } + } + + static function update_record($record) { + $data = array(); + foreach (module::installed() as $module_name => $module_info) { + $class_name = "{$module_name}_search"; + if (method_exists($class_name, "item_index_data")) { + $data[] = call_user_func(array($class_name, "item_index_data"), $record->item()); + } + } + $record->data = join(" ", $data); + $record->dirty = 0; + $record->save(); + } +} diff --git a/modules/search/helpers/search_event.php b/modules/search/helpers/search_event.php new file mode 100644 index 00000000..a29d301b --- /dev/null +++ b/modules/search/helpers/search_event.php @@ -0,0 +1,49 @@ +<?php defined("SYSPATH") or die("No direct script access."); +/** + * Gallery - a web based photo album viewer and editor + * Copyright (C) 2000-2008 Bharat Mediratta + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or (at + * your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA. + */ +class search_event_Core { + static function item_created($item) { + ORM::factory("search_record")->item_id($item->id)->save(); + } + + static function item_updated($old_item, $new_item) { + Database::instance() + ->from("search_records") + ->set("dirty", 1) + ->where("item_id", $new_item->id) + ->update(); + } + + static function item_before_delete($item) { + ORM::factory("item_id", $item->id)->delete_all(); + } + + static function item_related_update($item) { + Database::instance() + ->from("search_records") + ->set("dirty", 1) + ->where("item_id", $item->id) + ->update(); + } + + static function item_related_update_batch($sql) { + $db = Database::instance(); + $db->query("UPDATE `search_records` SET `dirty` = 1 WHERE item_id IN ($sql)"); + } +} diff --git a/modules/search/helpers/search_installer.php b/modules/search/helpers/search_installer.php index 7dc4c39a..ae8eacd3 100644 --- a/modules/search/helpers/search_installer.php +++ b/modules/search/helpers/search_installer.php @@ -20,12 +20,29 @@ class search_installer { static function install() { $version = module::get_version("search"); + $db = Database::instance(); if ($version == 0) { + $db->query("CREATE TABLE `search_records` ( + `id` int(9) NOT NULL auto_increment, + `item_id` int(9), + `dirty` boolean default 1, + `data` LONGTEXT default NULL, + PRIMARY KEY (`id`), + FULLTEXT INDEX (`data`)) + ENGINE=MyISAM DEFAULT CHARSET=utf8;"); + + // populate the index with dirty records + $db->query("insert into `search_records` (`item_id`) SELECT `id` FROM `items`"); module::set_version("search", 1); + + search::check_index(); } } static function uninstall() { + $db = Database::instance(); + $db->query("DROP TABLE `search_records`"); + site_status::clear("search_index_out_of_date"); module::delete("search"); } } diff --git a/modules/search/helpers/search_task.php b/modules/search/helpers/search_task.php new file mode 100644 index 00000000..5291a27b --- /dev/null +++ b/modules/search/helpers/search_task.php @@ -0,0 +1,61 @@ + +<?php defined("SYSPATH") or die("No direct script access."); +/** + * Gallery - a web based photo album viewer and editor + * Copyright (C) 2000-2008 Bharat Mediratta + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or (at + * your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA. + */ +class search_task_Core { + static function available_tasks() { + list ($remaining, $total, $percent) = self::_get_stats(); + return array(Task_Definition::factory() + ->callback("search_task::update_index") + ->name(t("Update Search Index")) + ->description($remaining ? + t("Search index is %percent% up-to-date", + array("percent" => $percent)) + : t("Search index is up to date")) + ->severity($remaining ? log::WARNING : log::SUCCESS)); + } + + static function update_index($task) { + $completed = $task->get("completed", 0); + foreach (ORM::factory("search_record")->where("dirty", 1)->limit(2)->find_all() as $record) { + search::update_record($record); + $completed++; + } + $task->set("completed", $completed); + + list ($remaining, $total, $percent) = self::_get_stats(); + $task->percent_complete = round(100 * $completed / ($remaining + $completed)); + + $task->status = t("%done records records updated, index is %percent% up-to-date", + array("done" => $completed, "percent" => $percent)); + + if ($remaining == 0) { + $task->done = true; + $task->state = "success"; + site_status::clear("search_index_out_of_date"); + } + } + + private static function _get_stats() { + $remaining = ORM::factory("search_record")->where("dirty", 1)->count_all(); + $total = ORM::factory("search_record")->count_all(); + $percent = round(100 * ($total - $remaining) / $total); + return array($remaining, $total, $percent); + } +} diff --git a/modules/search/models/search_record.php b/modules/search/models/search_record.php new file mode 100644 index 00000000..102641ae --- /dev/null +++ b/modules/search/models/search_record.php @@ -0,0 +1,24 @@ +<?php defined("SYSPATH") or die("No direct script access."); +/** + * Gallery - a web based photo album viewer and editor + * Copyright (C) 2000-2008 Bharat Mediratta + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or (at + * your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA. + */ +class Search_Record_Model extends ORM { + function item() { + return model_cache::get("item", $this->item_id); + } +} diff --git a/modules/search/views/search.html.php b/modules/search/views/search.html.php new file mode 100644 index 00000000..86a29db5 --- /dev/null +++ b/modules/search/views/search.html.php @@ -0,0 +1,35 @@ +<?php defined("SYSPATH") or die("No direct script access.") ?> +<div id="gSearch"> + <form action="<?= url::site("/search") ?>"> + <fieldset> + <legend> + <?= t("Search") ?> + </legend> + <ul> + <li> + <input name="q" type="text" value="<?= $q ?>"/> + </li> + <li> + <input type="submit"/> + </li> + </ul> + </fieldset> + </form> + + <ul> + <? foreach ($items as $item): ?> + <li> + <a href="<?= url::site("items/$item->id") ?>"> + <?= $item->thumb_tag() ?> + <p> + <?= $item->title ?> + </p> + <p> + <?= $item->description ?> + </p> + </a> + </li> + <? endforeach ?> + </ul> +</div> +<?= $theme->pager() ?> diff --git a/modules/search/views/search_link.html.php b/modules/search/views/search_link.html.php index d5a5c47b..7be2b101 100644 --- a/modules/search/views/search_link.html.php +++ b/modules/search/views/search_link.html.php @@ -1,9 +1,9 @@ <?php defined("SYSPATH") or die("No direct script access.") ?> -<form id="gSearchForm"> +<form action="<?= url::site("search") ?>" id="gSearchForm"> <ul> <li> <label for="gSearch"><?= t("Search the gallery") ?></label> - <input type="text" name="search" id="gSearch"/> + <input type="text" name="q" id="gSearch"/> </li> <li> <input type="submit" value="<?= t("Go") ?>" /> |