diff options
author | Bharat Mediratta <bharat@menalto.com> | 2008-10-31 22:12:14 +0000 |
---|---|---|
committer | Bharat Mediratta <bharat@menalto.com> | 2008-10-31 22:12:14 +0000 |
commit | eba717f95f586d2538007bd18da6e9b32b076c30 (patch) | |
tree | 15fc596a270f9de0d163c66c96e3c65fca5ee100 /kohana/core | |
parent | fff10f8b70376ef25722bd867df26bc5aefced43 (diff) |
Merge over vendor code.
git-svn-id: http://gallery.svn.sourceforge.net/svnroot/gallery/trunk/eval/gx/gallery3/trunk@18408 57fcd75e-5312-0410-8df3-f5eb6fbb1595
Diffstat (limited to 'kohana/core')
29 files changed, 3978 insertions, 0 deletions
diff --git a/kohana/core/Benchmark.php b/kohana/core/Benchmark.php new file mode 100644 index 00000000..18d1e5f1 --- /dev/null +++ b/kohana/core/Benchmark.php @@ -0,0 +1,94 @@ +<?php defined('SYSPATH') or die('No direct script access.'); +/** + * Simple benchmarking. + * + * $Id$ + * + * @package Core + * @author Kohana Team + * @copyright (c) 2007 Kohana Team + * @license http://kohanaphp.com/license.html + */ +final class Benchmark { + + // Benchmark timestamps + private static $marks; + + /** + * Set a benchmark start point. + * + * @param string benchmark name + * @return void + */ + public static function start($name) + { + if ( ! isset(self::$marks[$name])) + { + self::$marks[$name] = array + ( + 'start' => microtime(TRUE), + 'stop' => FALSE, + 'memory_start' => function_exists('memory_get_usage') ? memory_get_usage() : 0, + 'memory_stop' => FALSE + ); + } + } + + /** + * Set a benchmark stop point. + * + * @param string benchmark name + * @return void + */ + public static function stop($name) + { + if (isset(self::$marks[$name]) AND self::$marks[$name]['stop'] === FALSE) + { + self::$marks[$name]['stop'] = microtime(TRUE); + self::$marks[$name]['memory_stop'] = function_exists('memory_get_usage') ? memory_get_usage() : 0; + } + } + + /** + * Get the elapsed time between a start and stop. + * + * @param string benchmark name, TRUE for all + * @param integer number of decimal places to count to + * @return array + */ + public static function get($name, $decimals = 4) + { + if ($name === TRUE) + { + $times = array(); + $names = array_keys(self::$marks); + + foreach ($names as $name) + { + // Get each mark recursively + $times[$name] = self::get($name, $decimals); + } + + // Return the array + return $times; + } + + if ( ! isset(self::$marks[$name])) + return FALSE; + + if (self::$marks[$name]['stop'] === FALSE) + { + // Stop the benchmark to prevent mis-matched results + self::stop($name); + } + + // Return a string version of the time between the start and stop points + // Properly reading a float requires using number_format or sprintf + return array + ( + 'time' => number_format(self::$marks[$name]['stop'] - self::$marks[$name]['start'], $decimals), + 'memory' => (self::$marks[$name]['memory_stop'] - self::$marks[$name]['memory_start']) + ); + } + +} // End Benchmark diff --git a/kohana/core/Bootstrap.php b/kohana/core/Bootstrap.php new file mode 100644 index 00000000..3826571d --- /dev/null +++ b/kohana/core/Bootstrap.php @@ -0,0 +1,58 @@ +<?php defined('SYSPATH') or die('No direct script access.'); +/** + * Kohana process control file, loaded by the front controller. + * + * $Id$ + * + * @package Core + * @author Kohana Team + * @copyright (c) 2007 Kohana Team + * @license http://kohanaphp.com/license.html + */ + +define('KOHANA_VERSION', '2.2'); +define('KOHANA_CODENAME', 'efímera'); + +// Test of Kohana is running in Windows +define('KOHANA_IS_WIN', PHP_SHLIB_SUFFIX === 'dll'); + +// Kohana benchmarks are prefixed to prevent collisions +define('SYSTEM_BENCHMARK', 'system_benchmark'); + +// Load benchmarking support +require SYSPATH.'core/Benchmark'.EXT; + +// Start total_execution +Benchmark::start(SYSTEM_BENCHMARK.'_total_execution'); + +// Start kohana_loading +Benchmark::start(SYSTEM_BENCHMARK.'_kohana_loading'); + +// Load core files +require SYSPATH.'core/utf8'.EXT; +require SYSPATH.'core/Event'.EXT; +require SYSPATH.'core/Kohana'.EXT; + +// Prepare the environment +Kohana::setup(); + +// End kohana_loading +Benchmark::stop(SYSTEM_BENCHMARK.'_kohana_loading'); + +// Start system_initialization +Benchmark::start(SYSTEM_BENCHMARK.'_system_initialization'); + +// Prepare the system +Event::run('system.ready'); + +// Determine routing +Event::run('system.routing'); + +// End system_initialization +Benchmark::stop(SYSTEM_BENCHMARK.'_system_initialization'); + +// Make the magic happen! +Event::run('system.execute'); + +// Clean up and exit +Event::run('system.shutdown');
\ No newline at end of file diff --git a/kohana/core/Event.php b/kohana/core/Event.php new file mode 100644 index 00000000..1d6feae4 --- /dev/null +++ b/kohana/core/Event.php @@ -0,0 +1,232 @@ +<?php defined('SYSPATH') or die('No direct script access.'); +/** + * Process queuing/execution class. Allows an unlimited number of callbacks + * to be added to 'events'. Events can be run multiple times, and can also + * process event-specific data. By default, Kohana has several system events. + * + * $Id$ + * + * @package Core + * @author Kohana Team + * @copyright (c) 2007 Kohana Team + * @license http://kohanaphp.com/license.html + * @link http://docs.kohanaphp.com/general/events + */ +final class Event { + + // Event callbacks + private static $events = array(); + + // Cache of events that have been run + private static $has_run = array(); + + // Data that can be processed during events + public static $data; + + /** + * Add a callback to an event queue. + * + * @param string event name + * @param array http://php.net/callback + * @return boolean + */ + public static function add($name, $callback) + { + if ( ! isset(self::$events[$name])) + { + // Create an empty event if it is not yet defined + self::$events[$name] = array(); + } + elseif (in_array($callback, self::$events[$name], TRUE)) + { + // The event already exists + return FALSE; + } + + // Add the event + self::$events[$name][] = $callback; + + return TRUE; + } + + /** + * Add a callback to an event queue, before a given event. + * + * @param string event name + * @param array existing event callback + * @param array event callback + * @return boolean + */ + public static function add_before($name, $existing, $callback) + { + if (empty(self::$events[$name]) OR ($key = array_search($existing, self::$events[$name])) === FALSE) + { + // Just add the event if there are no events + return self::add($name, $callback); + } + else + { + // Insert the event immediately before the existing event + return self::insert_event($name, $key, $callback); + } + } + + /** + * Add a callback to an event queue, after a given event. + * + * @param string event name + * @param array existing event callback + * @param array event callback + * @return boolean + */ + public static function add_after($name, $existing, $callback) + { + if (empty(self::$events[$name]) OR ($key = array_search($existing, self::$events[$name])) === FALSE) + { + // Just add the event if there are no events + return self::add($name, $callback); + } + else + { + // Insert the event immediately after the existing event + return self::insert_event($name, $key + 1, $callback); + } + } + + /** + * Inserts a new event at a specfic key location. + * + * @param string event name + * @param integer key to insert new event at + * @param array event callback + * @return void + */ + private static function insert_event($name, $key, $callback) + { + if (in_array($callback, self::$events[$name], TRUE)) + return FALSE; + + // Add the new event at the given key location + self::$events[$name] = array_merge + ( + // Events before the key + array_slice(self::$events[$name], 0, $key), + // New event callback + array($callback), + // Events after the key + array_slice(self::$events[$name], $key) + ); + + return TRUE; + } + + /** + * Replaces an event with another event. + * + * @param string event name + * @param array event to replace + * @param array new callback + * @return boolean + */ + public static function replace($name, $existing, $callback) + { + if (empty(self::$events[$name]) OR ($key = array_search($existing, self::$events[$name], TRUE)) === FALSE) + return FALSE; + + if ( ! in_array($callback, self::$events[$name], TRUE)) + { + // Replace the exisiting event with the new event + self::$events[$name][$key] = $callback; + } + else + { + // Remove the existing event from the queue + unset(self::$events[$name][$key]); + + // Reset the array so the keys are ordered properly + self::$events[$name] = array_values(self::$events[$name]); + } + + return TRUE; + } + + /** + * Get all callbacks for an event. + * + * @param string event name + * @return array + */ + public static function get($name) + { + return empty(self::$events[$name]) ? array() : self::$events[$name]; + } + + /** + * Clear some or all callbacks from an event. + * + * @param string event name + * @param array specific callback to remove, FALSE for all callbacks + * @return void + */ + public static function clear($name, $callback = FALSE) + { + if ($callback === FALSE) + { + self::$events[$name] = array(); + } + elseif (isset(self::$events[$name])) + { + // Loop through each of the event callbacks and compare it to the + // callback requested for removal. The callback is removed if it + // matches. + foreach (self::$events[$name] as $i => $event_callback) + { + if ($callback === $event_callback) + { + unset(self::$events[$name][$i]); + } + } + } + } + + /** + * Execute all of the callbacks attached to an event. + * + * @param string event name + * @param array data can be processed as Event::$data by the callbacks + * @return void + */ + public static function run($name, & $data = NULL) + { + if ( ! empty(self::$events[$name])) + { + // So callbacks can access Event::$data + self::$data =& $data; + $callbacks = self::get($name); + + foreach ($callbacks as $callback) + { + call_user_func($callback); + } + + // Do this to prevent data from getting 'stuck' + $clear_data = ''; + self::$data =& $clear_data; + + // The event has been run! + self::$has_run[$name] = $name; + } + } + + /** + * Check if a given event has been run. + * + * @param string event name + * @return boolean + */ + public static function has_run($name) + { + return isset(self::$has_run[$name]); + } + +} // End Event
\ No newline at end of file diff --git a/kohana/core/Kohana.php b/kohana/core/Kohana.php new file mode 100644 index 00000000..109853b1 --- /dev/null +++ b/kohana/core/Kohana.php @@ -0,0 +1,1744 @@ +<?php defined('SYSPATH') or die('No direct script access.'); +/** + * Provides Kohana-specific helper functions. This is where the magic happens! + * + * $Id$ + * + * @package Core + * @author Kohana Team + * @copyright (c) 2007-2008 Kohana Team + * @license http://kohanaphp.com/license.html + */ +final class Kohana { + + // The singleton instance of the controller + public static $instance; + + // Output buffering level + private static $buffer_level; + + // Will be set to TRUE when an exception is caught + public static $has_error = FALSE; + + // The final output that will displayed by Kohana + public static $output = ''; + + // The current user agent + public static $user_agent; + + // The current locale + public static $locale; + + // Configuration + private static $configuration; + + // Include paths + private static $include_paths; + + // Logged messages + private static $log; + + // Cache lifetime + private static $cache_lifetime; + + // Log levels + private static $log_levels = array + ( + 'error' => 1, + 'alert' => 2, + 'info' => 3, + 'debug' => 4, + ); + + // Internal caches and write status + private static $internal_cache = array(); + private static $write_cache; + + /** + * Sets up the PHP environment. Adds error/exception handling, output + * buffering, and adds an auto-loading method for loading classes. + * + * This method is run immediately when this file is loaded, and is + * benchmarked as environment_setup. + * + * For security, this function also destroys the $_REQUEST global variable. + * Using the proper global (GET, POST, COOKIE, etc) is inherently more secure. + * The recommended way to fetch a global variable is using the Input library. + * @see http://www.php.net/globals + * + * @return void + */ + public static function setup() + { + static $run; + + // This function can only be run once + if ($run === TRUE) + return; + + // Start the environment setup benchmark + Benchmark::start(SYSTEM_BENCHMARK.'_environment_setup'); + + // Define Kohana error constant + define('E_KOHANA', 42); + + // Define 404 error constant + define('E_PAGE_NOT_FOUND', 43); + + // Define database error constant + define('E_DATABASE_ERROR', 44); + + if (self::$cache_lifetime = self::config('core.internal_cache')) + { + // Load cached configuration and language files + self::$internal_cache['configuration'] = self::cache('configuration', self::$cache_lifetime); + self::$internal_cache['language'] = self::cache('language', self::$cache_lifetime); + + // Load cached file paths + self::$internal_cache['find_file_paths'] = self::cache('find_file_paths', self::$cache_lifetime); + + // Enable cache saving + Event::add('system.shutdown', array(__CLASS__, 'internal_cache_save')); + } + + // Disable notices and "strict" errors + $ER = error_reporting(~E_NOTICE & ~E_STRICT); + + // Set the user agent + self::$user_agent = trim($_SERVER['HTTP_USER_AGENT']); + + if (function_exists('date_default_timezone_set')) + { + $timezone = Kohana::config('locale.timezone'); + + // Set default timezone, due to increased validation of date settings + // which cause massive amounts of E_NOTICEs to be generated in PHP 5.2+ + date_default_timezone_set(empty($timezone) ? date_default_timezone_get() : $timezone); + } + + // Restore error reporting + error_reporting($ER); + + // Start output buffering + ob_start(array(__CLASS__, 'output_buffer')); + + // Save buffering level + self::$buffer_level = ob_get_level(); + + // Set autoloader + spl_autoload_register(array('Kohana', 'auto_load')); + + // Set error handler + set_error_handler(array('Kohana', 'exception_handler')); + + // Set exception handler + set_exception_handler(array('Kohana', 'exception_handler')); + + // Send default text/html UTF-8 header + header('Content-Type: text/html; charset=UTF-8'); + + // Load locales + $locales = self::config('locale.language'); + + // Make first locale UTF-8 + $locales[0] .= '.UTF-8'; + + // Set locale information + self::$locale = setlocale(LC_ALL, $locales); + + if (self::$configuration['core']['log_threshold'] > 0) + { + // Set the log directory + self::log_directory(self::$configuration['core']['log_directory']); + + // Enable log writing at shutdown + register_shutdown_function(array(__CLASS__, 'log_save')); + } + + // Enable Kohana routing + Event::add('system.routing', array('Router', 'find_uri')); + Event::add('system.routing', array('Router', 'setup')); + + // Enable Kohana controller initialization + Event::add('system.execute', array('Kohana', 'instance')); + + // Enable Kohana 404 pages + Event::add('system.404', array('Kohana', 'show_404')); + + // Enable Kohana output handling + Event::add('system.shutdown', array('Kohana', 'shutdown')); + + if ($config = Kohana::config('core.enable_hooks')) + { + $hooks = array(); + + if ( ! is_array($config)) + { + // All of the hooks are enabled, so we use list_files + $hooks = Kohana::list_files('hooks', TRUE); + } + else + { + // Individual hooks need to be found + foreach ($config as $name) + { + if ($hook = Kohana::find_file('hooks', $name, FALSE)) + { + // Hook was found, add it to loaded hooks + $hooks[] = $hook; + } + else + { + // This should never happen + Kohana::log('error', 'Hook not found: '.$name); + } + } + } + + // Length of extension, for offset + $ext = -(strlen(EXT)); + + foreach ($hooks as $hook) + { + // Validate the filename extension + if (substr($hook, $ext) === EXT) + { + // Hook was found, include it + include $hook; + } + else + { + // This should never happen + Kohana::log('error', 'Hook not found: '.$hook); + } + } + } + + // Setup is complete, prevent it from being run again + $run = TRUE; + + // Stop the environment setup routine + Benchmark::stop(SYSTEM_BENCHMARK.'_environment_setup'); + } + + /** + * Loads the controller and initializes it. Runs the pre_controller, + * post_controller_constructor, and post_controller events. Triggers + * a system.404 event when the route cannot be mapped to a controller. + * + * This method is benchmarked as controller_setup and controller_execution. + * + * @return object instance of controller + */ + public static function & instance() + { + if (self::$instance === NULL) + { + Benchmark::start(SYSTEM_BENCHMARK.'_controller_setup'); + + if (Router::$method[0] === '_') + { + // Do not allow access to hidden methods + Event::run('system.404'); + } + + // Include the Controller file + require Router::$controller_path; + + try + { + // Start validation of the controller + $class = new ReflectionClass(ucfirst(Router::$controller).'_Controller'); + } + catch (ReflectionException $e) + { + // Controller does not exist + Event::run('system.404'); + } + + if (IN_PRODUCTION AND $class->getConstant('ALLOW_PRODUCTION') == FALSE) + { + // Controller is not allowed to run in production + Event::run('system.404'); + } + + // Run system.pre_controller + Event::run('system.pre_controller'); + + // Create a new controller instance + $controller = $class->newInstance(); + + // Controller constructor has been executed + Event::run('system.post_controller_constructor'); + + try + { + // Load the controller method + $method = $class->getMethod(Router::$method); + + if ($method->isProtected() or $method->isPrivate()) + { + // Do not attempt to invoke protected methods + throw new ReflectionException('protected controller method'); + } + + // Default arguments + $arguments = Router::$arguments; + } + catch (ReflectionException $e) + { + // Use __call instead + $method = $class->getMethod('__call'); + + // Use arguments in __call format + $arguments = array(Router::$method, Router::$arguments); + } + + // Stop the controller setup benchmark + Benchmark::stop(SYSTEM_BENCHMARK.'_controller_setup'); + + // Start the controller execution benchmark + Benchmark::start(SYSTEM_BENCHMARK.'_controller_execution'); + + // Execute the controller method + $method->invokeArgs($controller, $arguments); + + // Controller method has been executed + Event::run('system.post_controller'); + + // Stop the controller execution benchmark + Benchmark::stop(SYSTEM_BENCHMARK.'_controller_execution'); + } + + return self::$instance; + } + + /** + * Get all include paths. APPPATH is the first path, followed by module + * paths in the order they are configured, follow by the SYSPATH. + * + * @param boolean re-process the include paths + * @return array + */ + public static function include_paths($process = FALSE) + { + if ($process === TRUE) + { + // Add APPPATH as the first path + self::$include_paths = array(APPPATH); + + foreach (self::$configuration['core']['modules'] as $path) + { + if ($path = str_replace('\\', '/', realpath($path))) + { + // Add a valid path + self::$include_paths[] = $path.'/'; + } + } + + // Add SYSPATH as the last path + self::$include_paths[] = SYSPATH; + } + + return self::$include_paths; + } + + /** + * Get a config item or group. + * + * @param string item name + * @param boolean force a forward slash (/) at the end of the item + * @param boolean is the item required? + * @return mixed + */ + public static function config($key, $slash = FALSE, $required = TRUE) + { + if (self::$configuration === NULL) + { + // Load core configuration + self::$configuration['core'] = self::config_load('core'); + + // Re-parse the include paths + self::include_paths(TRUE); + } + + // Get the group name from the key + $group = explode('.', $key, 2); + $group = $group[0]; + + if ( ! isset(self::$configuration[$group])) + { + // Load the configuration group + self::$configuration[$group] = self::config_load($group, $required); + } + + // Get the value of the key string + $value = self::key_string(self::$configuration, $key); + + if ($slash === TRUE AND is_string($value) AND $value !== '') + { + // Force the value to end with "/" + $value = rtrim($value, '/').'/'; + } + + return $value; + } + + /** + * Sets a configuration item, if allowed. + * + * @param string config key string + * @param string config value + * @return boolean + */ + public static function config_set($key, $value) + { + // Do this to make sure that the config array is already loaded + self::config($key); + + if (substr($key, 0, 7) === 'routes.') + { + // Routes cannot contain sub keys due to possible dots in regex + $keys = explode('.', $key, 2); + } + else + { + // Convert dot-noted key string to an array + $keys = explode('.', $key); + } + + // Used for recursion + $conf =& self::$configuration; + $last = count($keys) - 1; + + foreach ($keys as $i => $k) + { + if ($i === $last) + { + $conf[$k] = $value; + } + else + { + $conf =& $conf[$k]; + } + } + + if ($key === 'core.modules') + { + // Reprocess the include paths + self::include_paths(TRUE); + } + + return TRUE; + } + + /** + * Load a config file. + * + * @param string config filename, without extension + * @param boolean is the file required? + * @return array + */ + public static function config_load($name, $required = TRUE) + { + if ($name === 'core') + { + // Load the application configuration file + require APPPATH.'config/config'.EXT; + + if ( ! isset($config['site_domain'])) + { + // Invalid config file + die('Your Kohana application configuration file is not valid.'); + } + + return $config; + } + + if (isset(self::$internal_cache['configuration'][$name])) + return self::$internal_cache['configuration'][$name]; + + // Load matching configs + $configuration = array(); + + if ($files = self::find_file('config', $name, $required)) + { + foreach ($files as $file) + { + require $file; + + if (isset($config) AND is_array($config)) + { + // Merge in configuration + $configuration = array_merge($configuration, $config); + } + } + } + + if ( ! isset(self::$write_cache['configuration'])) + { + // Cache has changed + self::$write_cache['configuration'] = TRUE; + } + + return self::$internal_cache['configuration'][$name] = $configuration; + } + + /** + * Clears a config group from the cached configuration. + * + * @param string config group + * @return void + */ + public static function config_clear($group) + { + // Remove the group from config + unset(self::$configuration[$group], self::$internal_cache['configuration'][$group]); + + if ( ! isset(self::$write_cache['configuration'])) + { + // Cache has changed + self::$write_cache['configuration'] = TRUE; + } + } + + /** + * Add a new message to the log. + * + * @param string type of message + * @param string message text + * @return void + */ + public static function log($type, $message) + { + if (self::$log_levels[$type] <= self::$configuration['core']['log_threshold']) + { + self::$log[] = array(date('Y-m-d H:i:s P'), $type, $message); + } + } + + /** + * Save all currently logged messages. + * + * @return void + */ + public static function log_save() + { + if (empty(self::$log)) + return; + + // Filename of the log + $filename = self::log_directory().date('Y-m-d').'.log'.EXT; + + if ( ! is_file($filename)) + { + // Write the SYSPATH checking header + file_put_contents($filename, + '<?php defined(\'SYSPATH\') or die(\'No direct script access.\'); ?>'.PHP_EOL.PHP_EOL); + + // Prevent external writes + chmod($filename, 0644); + } + + // Messages to write + $messages = array(); + + do + { + // Load the next mess + list ($date, $type, $text) = array_shift(self::$log); + + // Add a new message line + $messages[] = $date.' --- '.$type.': '.$text; + } + while ( ! empty(self::$log)); + + // Write messages to log file + file_put_contents($filename, implode(PHP_EOL, $messages).PHP_EOL, FILE_APPEND); + } + + /** + * Get or set the logging directory. + * + * @param string new log directory + * @return string + */ + public static function log_directory($dir = NULL) + { + static $directory; + + if ( ! empty($dir)) + { + // Get the directory path + $dir = realpath($dir); + + if (is_dir($dir) AND is_writable($dir)) + { + // Change the log directory + $directory = str_replace('\\', '/', $dir).'/'; + } + else + { + // Log directory is invalid + throw new Kohana_Exception('core.log_dir_unwritable', $dir); + } + } + + return $directory; + } + + /** + * Load data from a simple cache file. This should only be used internally, + * and is NOT a replacement for the Cache library. + * + * @param string unique name of cache + * @param integer expiration in seconds + * @return mixed + */ + public static function cache($name, $lifetime) + { + if ($lifetime > 0) + { + $path = APPPATH.'cache/kohana_'.$name; + + if (is_file($path)) + { + // Check the file modification time + if ((time() - filemtime($path)) < $lifetime) + { + // Cache is valid + return unserialize(file_get_contents($path)); + } + else + { + // Cache is invalid, delete it + unlink($path); + } + } + } + + // No cache found + return NULL; + } + + /** + * Save data to a simple cache file. This should only be used internally, and + * is NOT a replacement for the Cache library. + * + * @param string cache name + * @param mixed data to cache + * @param integer expiration in seconds + * @return boolean + */ + public static function cache_save($name, $data, $lifetime) + { + if ($lifetime < 1) + return FALSE; + + $path = APPPATH.'cache/kohana_'.$name; + + if ($data === NULL) + { + // Delete cache + return (is_file($path) and unlink($path)); + } + else + { + // Write data to cache file + return (bool) file_put_contents($path, serialize($data)); + } + } + + /** + * Kohana output handler. + * + * @param string current output buffer + * @return string + */ + public static function output_buffer($output) + { + if ( ! Event::has_run('system.send_headers')) + { + // Run the send_headers event, specifically for cookies being set + Event::run('system.send_headers'); + } + + // Set final output + self::$output = $output; + + // Set and return the final output + return $output; + } + + /** + * Closes all open output buffers, either by flushing or cleaning all + * open buffers, including the Kohana output buffer. + * + * @param boolean disable to clear buffers, rather than flushing + * @return void + */ + public static function close_buffers($flush = TRUE) + { + if (ob_get_level() >= self::$buffer_level) + { + // Set the close function + $close = ($flush === TRUE) ? 'ob_end_flush' : 'ob_end_clean'; + + while (ob_get_level() > self::$buffer_level) + { + // Flush or clean the buffer + $close(); + } + + // This will flush the Kohana buffer, which sets self::$output + ob_end_clean(); + + // Reset the buffer level + self::$buffer_level = ob_get_level(); + } + } + + /** + * Triggers the shutdown of Kohana by closing the output buffer, runs the system.display event. + * + * @return void + */ + public static function shutdown() + { + // Close output buffers + self::close_buffers(TRUE); + + // Run the output event + Event::run('system.display', self::$output); + + // Render the final output + self::render(self::$output); + } + + /** + * Inserts global Kohana variables into the generated output and prints it. + * + * @param string final output that will displayed + * @return void + */ + public static function render($output) + { + // Fetch memory usage in MB + $memory = function_exists('memory_get_usage') ? (memory_get_usage() / 1024 / 1024) : 0; + + // Fetch benchmark for page execution time + $benchmark = Benchmark::get(SYSTEM_BENCHMARK.'_total_execution'); + + if (Kohana::config('core.render_stats') === TRUE) + { + // Replace the global template variables + $output = str_replace( + array + ( + '{kohana_version}', + '{kohana_codename}', + '{execution_time}', + '{memory_usage}', + '{included_files}', + ), + array + ( + KOHANA_VERSION, + KOHANA_CODENAME, + $benchmark['time'], + number_format($memory, 2).'MB', + count(get_included_files()), + ), + $output + ); + } + + if ($level = Kohana::config('core.output_compression') AND ini_get('output_handler') !== 'ob_gzhandler' AND (int) ini_get('zlib.output_compression') === 0) + { + if ($level < 1 OR $level > 9) + { + // Normalize the level to be an integer between 1 and 9. This + // step must be done to prevent gzencode from triggering an error + $level = max(1, min($level, 9)); + } + + if (stripos(@$_SERVER['HTTP_ACCEPT_ENCODING'], 'gzip') !== FALSE) + { + $compress = 'gzip'; + } + elseif (stripos(@$_SERVER['HTTP_ACCEPT_ENCODING'], 'deflate') !== FALSE) + { + $compress = 'deflate'; + } + } + + if (isset($compress) AND $level > 0) + { + switch ($compress) + { + case 'gzip': + // Compress output using gzip + $output = gzencode($output, $level); + break; + case 'deflate': + // Compress output using zlib (HTTP deflate) + $output = gzdeflate($output, $level); + break; + } + + // This header must be sent with compressed content to prevent + // browser caches from breaking + header('Vary: Accept-Encoding'); + + // Send the content encoding header + header('Content-Encoding: '.$compress); + + // Sending Content-Length in CGI can result in unexpected behavior + if (stripos(PHP_SAPI, 'cgi') === FALSE) + { + header('Content-Length: '.strlen($output)); + } + } + + echo $output; + } + + /** + * Displays a 404 page. + * + * @throws Kohana_404_Exception + * @param string URI of page + * @param string custom template + * @return void + */ + public static function show_404($page = FALSE, $template = FALSE) + { + throw new Kohana_404_Exception($page, $template); + } + + /** + * Dual-purpose PHP error and exception handler. Uses the kohana_error_page + * view to display the message. + * + * @param integer|object exception object or error code + * @param string error message + * @param string filename + * @param integer line number + * @return void + */ + public static function exception_handler($exception, $message = NULL, $file = NULL, $line = NULL) + { + // PHP errors have 5 args, always + $PHP_ERROR = (func_num_args() === 5); + + // Test to see if errors should be displayed + if ($PHP_ERROR AND (error_reporting() & $exception) === 0) + return; + + // This is useful for hooks to determine if a page has an error + self::$has_error = TRUE; + + // Error handling will use exactly 5 args, every time + if ($PHP_ERROR) + { + $code = $exception; + $type = 'PHP Error'; + $template = 'kohana_error_page'; + } + else + { + $code = $exception->getCode(); + $type = get_class($exception); + $message = $exception->getMessage(); + $file = $exception->getFile(); + $line = $exception->getLine(); + $template = ($exception instanceof Kohana_Exception) ? $exception->getTemplate() : 'kohana_error_page'; + } + + if (is_numeric($code)) + { + $codes = self::lang('errors'); + + if ( ! empty($codes[$code])) + { + list($level, $error, $description) = $codes[$code]; + } + else + { + $level = 1; + $error = $PHP_ERROR ? 'Unknown Error' : get_class($exception); + $description = ''; + } + } + else + { + // Custom error message, this will never be logged + $level = 5; + $error = $code; + $description = ''; + } + + // Remove the DOCROOT from the path, as a security precaution + $file = str_replace('\\', '/', realpath($file)); + $file = preg_replace('|^'.preg_quote(DOCROOT).'|', '', $file); + + if ($level >= self::$configuration['core']['log_threshold']) + { + // Log the error + self::log('error', self::lang('core.uncaught_exception', $type, $message, $file, $line)); + } + + if ($PHP_ERROR) + { + $description = self::lang('errors.'.E_RECOVERABLE_ERROR); + $description = is_array($description) ? $description[2] : ''; + + if ( ! headers_sent()) + { + // Send the 500 header + header('HTTP/1.1 500 Internal Server Error'); + } + } + else + { + if (method_exists($exception, 'sendHeaders') AND ! headers_sent()) + { + // Send the headers if they have not already been sent + $exception->sendHeaders(); + } + } + + while (ob_get_level() > self::$buffer_level) + { + // Close open buffers + ob_end_clean(); + } + + // Test if display_errors is on + if (self::$configuration['core']['display_errors'] === TRUE) + { + if ( ! IN_PRODUCTION AND $line != FALSE) + { + // Remove the first entry of debug_backtrace(), it is the exception_handler call + $trace = $PHP_ERROR ? array_slice(debug_backtrace(), 1) : $exception->getTrace(); + + // Beautify backtrace + $trace = self::backtrace($trace); + } + + // Load the error + require self::find_file('views', empty($template) ? 'kohana_error_page' : $template); + } + else + { + // Get the i18n messages + $error = self::lang('core.generic_error'); + $message = self::lang('core.errors_disabled', url::site(), url::site(Router::$current_uri)); + + // Load the errors_disabled view + require self::find_file('views', 'kohana_error_disabled'); + } + + if ( ! Event::has_run('system.shutdown')) + { + // Run the shutdown even to ensure a clean exit + Event::run('system.shutdown'); + } + + // Turn off error reporting + error_reporting(0); + exit; + } + + /** + * Provides class auto-loading. + * + * @throws Kohana_Exception + * @param string name of class + * @return bool + */ + public static function auto_load($class) + { + if (class_exists($class, FALSE)) + return TRUE; + + if (($suffix = strrpos($class, '_')) > 0) + { + // Find the class suffix + $suffix = substr($class, $suffix + 1); + } + else + { + // No suffix + $suffix = FALSE; + } + + if ($suffix === 'Core') + { + $type = 'libraries'; + $file = substr($class, 0, -5); + } + elseif ($suffix === 'Controller') + { + $type = 'controllers'; + // Lowercase filename + $file = strtolower(substr($class, 0, -11)); + } + elseif ($suffix === 'Model') + { + $type = 'models'; + // Lowercase filename + $file = strtolower(substr($class, 0, -6)); + } + elseif ($suffix === 'Driver') + { + $type = 'libraries/drivers'; + $file = str_replace('_', '/', substr($class, 0, -7)); + } + else + { + // This could be either a library or a helper, but libraries must + // always be capitalized, so we check if the first character is + // uppercase. If it is, we are loading a library, not a helper. + $type = ($class[0] < 'a') ? 'libraries' : 'helpers'; + $file = $class; + } + + if (($filepath = self::find_file($type, $file)) === FALSE) + return FALSE; + + // Load the file + require $filepath; + + if ($type === 'libraries' OR $type === 'helpers') + { + if ($extension = self::find_file($type, self::$configuration['core']['extension_prefix'].$class)) + { + // Load the extension + require $extension; + } + elseif ($suffix !== 'Core' AND class_exists($class.'_Core', FALSE)) + { + // Class extension to be evaluated + $extension = 'class '.$class.' extends '.$class.'_Core { }'; + + // Start class analysis + $core = new ReflectionClass($class.'_Core'); + + if ($core->isAbstract()) + { + // Make the extension abstract + $extension = 'abstract '.$extension; + } + + // Transparent class extensions are handled using eval. This is + // a disgusting hack, but it gets the job done. + eval($extension); + } + } + + return TRUE; + } + + /** + * Find a resource file in a given directory. Files will be located according + * to the order of the include paths. Configuration and i18n files will be + * returned in reverse order. + * + * @throws Kohana_Exception if file is required and not found + * @param string directory to search in + * @param string filename to look for (including extension only if 4th parameter is TRUE) + * @param boolean file required + * @param string file extension + * @return array if the type is config, i18n or l10n + * @return string if the file is found + * @return FALSE if the file is not found + */ + public static function find_file($directory, $filename, $required = FALSE, $ext = FALSE) + { + // NOTE: This test MUST be not be a strict comparison (===), or empty + // extensions will be allowed! + if ($ext == '') + { + // Use the default extension + $ext = EXT; + } + else + { + // Add a period before the extension + $ext = '.'.$ext; + } + + // Search path + $search = $directory.'/'.$filename.$ext; + + if (isset(self::$internal_cache['find_file_paths'][$search])) + return self::$internal_cache['find_file_paths'][$search]; + + // Load include paths + $paths = self::$include_paths; + + // Nothing found, yet + $found = NULL; + + if ($directory === 'config' OR $directory === 'i18n') + { + // Search in reverse, for merging + $paths = array_reverse($paths); + + foreach ($paths as $path) + { + if (is_file($path.$search)) + { + // A matching file has been found + $found[] = $path.$search; + } + } + } + else + { + foreach ($paths as $path) + { + if (is_file($path.$search)) + { + // A matching file has been found + $found = $path.$search; + + // Stop searching + break; + } + } + } + + if ($found === NULL) + { + if ($required === TRUE) + { + // Directory i18n key + $directory = 'core.'.inflector::singular($directory); + + // If the file is required, throw an exception + throw new Kohana_Exception('core.resource_not_found', self::lang($directory), $filename); + } + else + { + // Nothing was found, return FALSE + $found = FALSE; + } + } + + if ( ! isset(self::$write_cache['find_file_paths'])) + { + // Write cache at shutdown + self::$write_cache['find_file_paths'] = TRUE; + } + + return self::$internal_cache['find_file_paths'][$search] = $found; + } + + /** + * Lists all files and directories in a resource path. + * + * @param string directory to search + * @param boolean list all files to the maximum depth? + * @param string full path to search (used for recursion, *never* set this manually) + * @return array filenames and directories + */ + public static function list_files($directory, $recursive = FALSE, $path = FALSE) + { + $files = array(); + + if ($path === FALSE) + { + $paths = array_reverse(Kohana::include_paths()); + + foreach ($paths as $path) + { + // Recursively get and merge all files + $files = array_merge($files, self::list_files($directory, $recursive, $path.$directory)); + } + } + else + { + $path = rtrim($path, '/').'/'; + + if (is_readable($path)) + { + $items = (array) glob($path.'*'); + + foreach ($items as $index => $item) + { + $files[] = $item = str_replace('\\', '/', $item); + + // Handle recursion + if (is_dir($item) AND $recursive == TRUE) + { + // Filename should only be the basename + $item = pathinfo($item, PATHINFO_BASENAME); + + // Append sub-directory search + $files = array_merge($files, self::list_files($directory, TRUE, $path.$item)); + } + } + } + } + + return $files; + } + + /** + * Fetch an i18n language item. + * + * @param string language key to fetch + * @param array additional information to insert into the line + * @return string i18n language string, or the requested key if the i18n item is not found + */ + public static function lang($key, $args = array()) + { + // Extract the main group from the key + $group = explode('.', $key, 2); + $group = $group[0]; + + // Get locale name + $locale = Kohana::config('locale.language.0'); + + if ( ! isset(self::$internal_cache['language'][$locale][$group])) + { + // Messages for this group + $messages = array(); + + if ($files = self::find_file('i18n', $locale.'/'.$group)) + { + foreach ($files as $file) + { + include $file; + + // Merge in configuration + if ( ! empty($lang) AND is_array($lang)) + { + foreach ($lang as $k => $v) + { + $messages[$k] = $v; + } + } + } + } + + if ( ! isset(self::$write_cache['language'])) + { + // Write language cache + self::$write_cache['language'] = TRUE; + } + + self::$internal_cache['language'][$locale][$group] = $messages; + } + + // Get the line from cache + $line = self::key_string(self::$internal_cache['language'][$locale], $key); + + if ($line === NULL) + { + Kohana::log('error', 'Missing i18n entry '.$key.' for language '.$locale); + + // Return the key string as fallback + return $key; + } + + if (is_string($line) AND func_num_args() > 1) + { + $args = array_slice(func_get_args(), 1); + + // Add the arguments into the line + $line = vsprintf($line, is_array($args[0]) ? $args[0] : $args); + } + + return $line; + } + + /** + * Returns the value of a key, defined by a 'dot-noted' string, from an array. + * + * @param array array to search + * @param string dot-noted string: foo.bar.baz + * @return string if the key is found + * @return void if the key is not found + */ + public static function key_string($array, $keys) + { + if (empty($array)) + return NULL; + + // Prepare for loop + $keys = explode('.', $keys); + + do + { + // Get the next key + $key = array_shift($keys); + + if (isset($array[$key])) + { + if (is_array($array[$key]) AND ! empty($keys)) + { + // Dig down to prepare the next loop + $array = $array[$key]; + } + else + { + // Requested key was found + return $array[$key]; + } + } + else + { + // Requested key is not set + break; + } + } + while ( ! empty($keys)); + + return NULL; + } + + /** + * Sets values in an array by using a 'dot-noted' string. + * + * @param array array to set keys in (reference) + * @param string dot-noted string: foo.bar.baz + * @return mixed fill value for the key + * @return void + */ + public static function key_string_set( & $array, $keys, $fill = NULL) + { + if (is_object($array) AND ($array instanceof ArrayObject)) + { + // Copy the array + $array_copy = $array->getArrayCopy(); + + // Is an object + $array_object = TRUE; + } + else + { + if ( ! is_array($array)) + { + // Must always be an array + $array = (array) $array; + } + + // Copy is a reference to the array + $array_copy =& $array; + } + + if (empty($keys)) + return $array; + + // Create keys + $keys = explode('.', $keys); + + // Create reference to the array + $row =& $array_copy; + + for ($i = 0, $end = count($keys) - 1; $i <= $end; $i++) + { + // Get the current key + $key = $keys[$i]; + + if ( ! isset($row[$key])) + { + if (isset($keys[$i + 1])) + { + // Make the value an array + $row[$key] = array(); + } + else + { + // Add the fill key + $row[$key] = $fill; + } + } + elseif (isset($keys[$i + 1])) + { + // Make the value an array + $row[$key] = (array) $row[$key]; + } + + // Go down a level, creating a new row reference + $row =& $row[$key]; + } + + if (isset($array_object)) + { + // Swap the array back in + $array->exchangeArray($array_copy); + } + } + + /** + * Retrieves current user agent information: + * keys: browser, version, platform, mobile, robot, referrer, languages, charsets + * tests: is_browser, is_mobile, is_robot, accept_lang, accept_charset + * + * @param string key or test name + * @param string used with "accept" tests: user_agent(accept_lang, en) + * @return array languages and charsets + * @return string all other keys + * @return boolean all tests + */ + public static function user_agent($key = 'agent', $compare = NULL) + { + static $info; + + // Return the raw string + if ($key === 'agent') + return Kohana::$user_agent; + + if ($info === NULL) + { + // Parse the user agent and extract basic information + $agents = Kohana::config('user_agents'); + + foreach ($agents as $type => $data) + { + foreach ($data as $agent => $name) + { + if (stripos(Kohana::$user_agent, $agent) !== FALSE) + { + if ($type === 'browser' AND preg_match('|'.preg_quote($agent).'[^0-9.]*+([0-9.][0-9.a-z]*)|i', Kohana::$user_agent, $match)) + { + // Set the browser version + $info['version'] = $match[1]; + } + + // Set the agent name + $info[$type] = $name; + break; + } + } + } + } + + if (empty($info[$key])) + { + switch ($key) + { + case 'is_robot': + case 'is_browser': + case 'is_mobile': + // A boolean result + $return = ! empty($info[substr($key, 3)]); + break; + case 'languages': + $return = array(); + if ( ! empty($_SERVER['HTTP_ACCEPT_LANGUAGE'])) + { + if (preg_match_all('/[-a-z]{2,}/', strtolower(trim($_SERVER['HTTP_ACCEPT_LANGUAGE'])), $matches)) + { + // Found a result + $return = $matches[0]; + } + } + break; + case 'charsets': + $return = array(); + if ( ! empty($_SERVER['HTTP_ACCEPT_CHARSET'])) + { + if (preg_match_all('/[-a-z0-9]{2,}/', strtolower(trim($_SERVER['HTTP_ACCEPT_CHARSET'])), $matches)) + { + // Found a result + $return = $matches[0]; + } + } + break; + case 'referrer': + if ( ! empty($_SERVER['HTTP_REFERER'])) + { + // Found a result + $return = trim($_SERVER['HTTP_REFERER']); + } + break; + } + + // Cache the return value + isset($return) and $info[$key] = $return; + } + + if ( ! empty($compare)) + { + // The comparison must always be lowercase + $compare = strtolower($compare); + + switch ($key) + { + case 'accept_lang': + // Check if the lange is accepted + return in_array($compare, Kohana::user_agent('languages')); + break; + case 'accept_charset': + // Check if the charset is accepted + return in_array($compare, Kohana::user_agent('charsets')); + break; + default: + // Invalid comparison + return FALSE; + break; + } + } + + // Return the key, if set + return isset($info[$key]) ? $info[$key] : NULL; + } + + /** + * Quick debugging of any variable. Any number of parameters can be set. + * + * @return string + */ + public static function debug() + { + if (func_num_args() === 0) + return; + + // Get params + $params = func_get_args(); + $output = array(); + + foreach ($params as $var) + { + $output[] = '<pre>('.gettype($var).') '.html::specialchars(print_r($var, TRUE)).'</pre>'; + } + + return implode("\n", $output); + } + + /** + * Displays nice backtrace information. + * @see http://php.net/debug_backtrace + * + * @param array backtrace generated by an exception or debug_backtrace + * @return string + */ + public static function backtrace($trace) + { + if ( ! is_array($trace)) + return; + + // Final output + $output = array(); + + foreach ($trace as $entry) + { + $temp = '<li>'; + + if (isset($entry['file'])) + { + $temp .= Kohana::lang('core.error_file_line', preg_replace('!^'.preg_quote(DOCROOT).'!', '', $entry['file']), $entry['line']); + } + + $temp .= '<pre>'; + + if (isset($entry['class'])) + { + // Add class and call type + $temp .= $entry['class'].$entry['type']; + } + + // Add function + $temp .= $entry['function'].'( '; + + // Add function args + if (isset($entry['args']) AND is_array($entry['args'])) + { + // Separator starts as nothing + $sep = ''; + + while ($arg = array_shift($entry['args'])) + { + if (is_string($arg) AND is_file($arg)) + { + // Remove docroot from filename + $arg = preg_replace('!^'.preg_quote(DOCROOT).'!', '', $arg); + } + + $temp .= $sep.html::specialchars(print_r($arg, TRUE)); + + // Change separator to a comma + $sep = ', '; + } + } + + $temp .= ' )</pre></li>'; + + $output[] = $temp; + } + + return '<ul class="backtrace">'.implode("\n", $output).'</ul>'; + } + + /** + * Saves the internal caches: configuration, include paths, etc. + * + * @return boolean + */ + public static function internal_cache_save() + { + if ( ! is_array(self::$write_cache)) + return FALSE; + + // Get internal cache names + $caches = array_keys(self::$write_cache); + + // Nothing written + $written = FALSE; + + foreach ($caches as $cache) + { + if (isset(self::$internal_cache[$cache])) + { + // Write the cache file + self::cache_save($cache, self::$internal_cache[$cache], self::$configuration['core']['internal_cache']); + + // A cache has been written + $written = TRUE; + } + } + + return $written; + } + +} // End Kohana + +/** + * Creates a generic i18n exception. + */ +class Kohana_Exception extends Exception { + + // Template file + protected $template = 'kohana_error_page'; + + // Header + protected $header = FALSE; + + // Error code + protected $code = E_KOHANA; + + /** + * Set exception message. + * + * @param string i18n language key for the message + * @param array addition line parameters + */ + public function __construct($error) + { + $args = array_slice(func_get_args(), 1); + + // Fetch the error message + $message = Kohana::lang($error, $args); + + if ($message === $error OR empty($message)) + { + // Unable to locate the message for the error + $message = 'Unknown Exception: '.$error; + } + + // Sets $this->message the proper way + parent::__construct($message); + } + + /** + * Magic method for converting an object to a string. + * + * @return string i18n message + */ + public function __toString() + { + return (string) $this->message; + } + + /** + * Fetch the template name. + * + * @return string + */ + public function getTemplate() + { + return $this->template; + } + + /** + * Sends an Internal Server Error header. + * + * @return void + */ + public function sendHeaders() + { + // Send the 500 header + header('HTTP/1.1 500 Internal Server Error'); + } + +} // End Kohana Exception + +/** + * Creates a custom exception. + */ +class Kohana_User_Exception extends Kohana_Exception { + + /** + * Set exception title and message. + * + * @param string exception title string + * @param string exception message string + * @param string custom error template + */ + public function __construct($title, $message, $template = FALSE) + { + Exception::__construct($message); + + $this->code = $title; + + if ($template !== FALSE) + { + $this->template = $template; + } + } + +} // End Kohana PHP Exception + +/** + * Creates a Page Not Found exception. + */ +class Kohana_404_Exception extends Kohana_Exception { + + protected $code = E_PAGE_NOT_FOUND; + + /** + * Set internal properties. + * + * @param string URL of page + * @param string custom error template + */ + public function __construct($page = FALSE, $template = FALSE) + { + if ($page === FALSE) + { + // Construct the page URI using Router properties + $page = Router::$current_uri.Router::$url_suffix.Router::$query_string; + } + + Exception::__construct(Kohana::lang('core.page_not_found', $page)); + + $this->template = $template; + } + + /** + * Sends "File Not Found" headers, to emulate server behavior. + * + * @return void + */ + public function sendHeaders() + { + // Send the 404 header + header('HTTP/1.1 404 File Not Found'); + } + +} // End Kohana 404 Exception diff --git a/kohana/core/utf8.php b/kohana/core/utf8.php new file mode 100644 index 00000000..e102da4e --- /dev/null +++ b/kohana/core/utf8.php @@ -0,0 +1,743 @@ +<?php defined('SYSPATH') or die('No direct script access.'); +/** + * A port of phputf8 to a unified file/class. Checks PHP status to ensure that + * UTF-8 support is available and normalize global variables to UTF-8. It also + * provides multi-byte aware replacement string functions. + * + * This file is licensed differently from the rest of Kohana. As a port of + * phputf8, which is LGPL software, this file is released under the LGPL. + * + * PCRE needs to be compiled with UTF-8 support (--enable-utf8). + * Support for Unicode properties is highly recommended (--enable-unicode-properties). + * @see http://php.net/manual/reference.pcre.pattern.modifiers.php + * + * UTF-8 conversion will be much more reliable if the iconv extension is loaded. + * @see http://php.net/iconv + * + * The mbstring extension is highly recommended, but must not be overloading + * string functions. + * @see http://php.net/mbstring + * + * $Id$ + * + * @package Core + * @author Kohana Team + * @copyright (c) 2007 Kohana Team + * @copyright (c) 2005 Harry Fuecks + * @license http://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt + */ + +if ( ! preg_match('/^.$/u', 'ñ')) +{ + trigger_error + ( + '<a href="http://php.net/pcre">PCRE</a> has not been compiled with UTF-8 support. '. + 'See <a href="http://php.net/manual/reference.pcre.pattern.modifiers.php">PCRE Pattern Modifiers</a> '. + 'for more information. This application cannot be run without UTF-8 support.', + E_USER_ERROR + ); +} + +if ( ! extension_loaded('iconv')) +{ + trigger_error + ( + 'The <a href="http://php.net/iconv">iconv</a> extension is not loaded. '. + 'Without iconv, strings cannot be properly translated to UTF-8 from user input. '. + 'This application cannot be run without UTF-8 support.', + E_USER_ERROR + ); +} + +if (extension_loaded('mbstring') AND (ini_get('mbstring.func_overload') & MB_OVERLOAD_STRING)) +{ + trigger_error + ( + 'The <a href="http://php.net/mbstring">mbstring</a> extension is overloading PHP\'s native string functions. '. + 'Disable this by setting mbstring.func_overload to 0, 1, 4 or 5 in php.ini or a .htaccess file.'. + 'This application cannot be run without UTF-8 support.', + E_USER_ERROR + ); +} + +// Check PCRE support for Unicode properties such as \p and \X. +$ER = error_reporting(0); +define('PCRE_UNICODE_PROPERTIES', (bool) preg_match('/^\pL$/u', 'ñ')); +error_reporting($ER); + +// SERVER_UTF8 ? use mb_* functions : use non-native functions +if (extension_loaded('mbstring')) +{ + mb_internal_encoding('UTF-8'); + define('SERVER_UTF8', TRUE); +} +else +{ + define('SERVER_UTF8', FALSE); +} + +// Convert all global variables to UTF-8. +$_GET = utf8::clean($_GET); +$_POST = utf8::clean($_POST); +$_COOKIE = utf8::clean($_COOKIE); +$_SERVER = utf8::clean($_SERVER); + +if (PHP_SAPI == 'cli') +{ + // Convert command line arguments + $_SERVER['argv'] = utf8::clean($_SERVER['argv']); +} + +final class utf8 { + + // Called methods + static $called = array(); + + /** + * Recursively cleans arrays, objects, and strings. Removes ASCII control + * codes and converts to UTF-8 while silently discarding incompatible + * UTF-8 characters. + * + * @param string string to clean + * @return string + */ + public static function clean($str) + { + if (is_array($str) OR is_object($str)) + { + foreach ($str as $key => $val) + { + // Recursion! + $str[self::clean($key)] = self::clean($val); + } + } + elseif (is_string($str) AND $str !== '') + { + // Remove control characters + $str = self::strip_ascii_ctrl($str); + + if ( ! self::is_ascii($str)) + { + // Disable notices + $ER = error_reporting(~E_NOTICE); + + // iconv is expensive, so it is only used when needed + $str = iconv('UTF-8', 'UTF-8//IGNORE', $str); + + // Turn notices back on + error_reporting($ER); + } + } + + return $str; + } + + /** + * Tests whether a string contains only 7bit ASCII bytes. This is used to + * determine when to use native functions or UTF-8 functions. + * + * @param string string to check + * @return bool + */ + public static function is_ascii($str) + { + return ! preg_match('/[^\x00-\x7F]/S', $str); + } + + /** + * Strips out device control codes in the ASCII range. + * + * @param string string to clean + * @return string + */ + public static function strip_ascii_ctrl($str) + { + return preg_replace('/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S', '', $str); + } + + /** + * Strips out all non-7bit ASCII bytes. + * + * @param string string to clean + * @return string + */ + public static function strip_non_ascii($str) + { + return preg_replace('/[^\x00-\x7F]+/S', '', $str); + } + + /** + * Replaces special/accented UTF-8 characters by ASCII-7 'equivalents'. + * + * @author Andreas Gohr <andi@splitbrain.org> + * + * @param string string to transliterate + * @param integer -1 lowercase only, +1 uppercase only, 0 both cases + * @return string + */ + public static function transliterate_to_ascii($str, $case = 0) + { + if ( ! isset(self::$called[__FUNCTION__])) + { + require SYSPATH.'core/utf8/'.__FUNCTION__.EXT; + + // Function has been called + self::$called[__FUNCTION__] = TRUE; + } + + return _transliterate_to_ascii($str, $case); + } + + /** + * Returns the length of the given string. + * @see http://php.net/strlen + * + * @param string string being measured for length + * @return integer + */ + public static function strlen($str) + { + if ( ! isset(self::$called[__FUNCTION__])) + { + require SYSPATH.'core/utf8/'.__FUNCTION__.EXT; + + // Function has been called + self::$called[__FUNCTION__] = TRUE; + } + + return _strlen($str); + } + + /** + * Finds position of first occurrence of a UTF-8 string. + * @see http://php.net/strlen + * + * @author Harry Fuecks <hfuecks@gmail.com> + * + * @param string haystack + * @param string needle + * @param integer offset from which character in haystack to start searching + * @return integer position of needle + * @return boolean FALSE if the needle is not found + */ + public static function strpos($str, $search, $offset = 0) + { + if ( ! isset(self::$called[__FUNCTION__])) + { + require SYSPATH.'core/utf8/'.__FUNCTION__.EXT; + + // Function has been called + self::$called[__FUNCTION__] = TRUE; + } + + return _strpos($str, $search, $offset); + } + + /** + * Finds position of last occurrence of a char in a UTF-8 string. + * @see http://php.net/strrpos + * + * @author Harry Fuecks <hfuecks@gmail.com> + * + * @param string haystack + * @param string needle + * @param integer offset from which character in haystack to start searching + * @return integer position of needle + * @return boolean FALSE if the needle is not found + */ + public static function strrpos($str, $search, $offset = 0) + { + if ( ! isset(self::$called[__FUNCTION__])) + { + require SYSPATH.'core/utf8/'.__FUNCTION__.EXT; + + // Function has been called + self::$called[__FUNCTION__] = TRUE; + } + + return _strrpos($str, $search, $offset); + } + + /** + * Returns part of a UTF-8 string. + * @see http://php.net/substr + * + * @author Chris Smith <chris@jalakai.co.uk> + * + * @param string input string + * @param integer offset + * @param integer length limit + * @return string + */ + public static function substr($str, $offset, $length = NULL) + { + if ( ! isset(self::$called[__FUNCTION__])) + { + require SYSPATH.'core/utf8/'.__FUNCTION__.EXT; + + // Function has been called + self::$called[__FUNCTION__] = TRUE; + } + + return _substr($str, $offset, $length); + } + + /** + * Replaces text within a portion of a UTF-8 string. + * @see http://php.net/substr_replace + * + * @author Harry Fuecks <hfuecks@gmail.com> + * + * @param string input string + * @param string replacement string + * @param integer offset + * @return string + */ + public static function substr_replace($str, $replacement, $offset, $length = NULL) + { + if ( ! isset(self::$called[__FUNCTION__])) + { + require SYSPATH.'core/utf8/'.__FUNCTION__.EXT; + + // Function has been called + self::$called[__FUNCTION__] = TRUE; + } + + return _substr_replace($str, $replacement, $offset, $length); + } + + /** + * Makes a UTF-8 string lowercase. + * @see http://php.net/strtolower + * + * @author Andreas Gohr <andi@splitbrain.org> + * + * @param string mixed case string + * @return string + */ + public static function strtolower($str) + { + if ( ! isset(self::$called[__FUNCTION__])) + { + require SYSPATH.'core/utf8/'.__FUNCTION__.EXT; + + // Function has been called + self::$called[__FUNCTION__] = TRUE; + } + + return _strtolower($str); + } + + /** + * Makes a UTF-8 string uppercase. + * @see http://php.net/strtoupper + * + * @author Andreas Gohr <andi@splitbrain.org> + * + * @param string mixed case string + * @return string + */ + public static function strtoupper($str) + { + if ( ! isset(self::$called[__FUNCTION__])) + { + require SYSPATH.'core/utf8/'.__FUNCTION__.EXT; + + // Function has been called + self::$called[__FUNCTION__] = TRUE; + } + + return _strtoupper($str); + } + + /** + * Makes a UTF-8 string's first character uppercase. + * @see http://php.net/ucfirst + * + * @author Harry Fuecks <hfuecks@gmail.com> + * + * @param string mixed case string + * @return string + */ + public static function ucfirst($str) + { + if ( ! isset(self::$called[__FUNCTION__])) + { + require SYSPATH.'core/utf8/'.__FUNCTION__.EXT; + + // Function has been called + self::$called[__FUNCTION__] = TRUE; + } + + return _ucfirst($str); + } + + /** + * Makes the first character of every word in a UTF-8 string uppercase. + * @see http://php.net/ucwords + * + * @author Harry Fuecks <hfuecks@gmail.com> + * + * @param string mixed case string + * @return string + */ + public static function ucwords($str) + { + if ( ! isset(self::$called[__FUNCTION__])) + { + require SYSPATH.'core/utf8/'.__FUNCTION__.EXT; + + // Function has been called + self::$called[__FUNCTION__] = TRUE; + } + + return _ucwords($str); + } + + /** + * Case-insensitive UTF-8 string comparison. + * @see http://php.net/strcasecmp + * + * @author Harry Fuecks <hfuecks@gmail.com> + * + * @param string string to compare + * @param string string to compare + * @return integer less than 0 if str1 is less than str2 + * @return integer greater than 0 if str1 is greater than str2 + * @return integer 0 if they are equal + */ + public static function strcasecmp($str1, $str2) + { + if ( ! isset(self::$called[__FUNCTION__])) + { + require SYSPATH.'core/utf8/'.__FUNCTION__.EXT; + + // Function has been called + self::$called[__FUNCTION__] = TRUE; + } + + return _strcasecmp($str1, $str2); + } + + /** + * Returns a string or an array with all occurrences of search in subject (ignoring case). + * replaced with the given replace value. + * @see http://php.net/str_ireplace + * + * @note It's not fast and gets slower if $search and/or $replace are arrays. + * @author Harry Fuecks <hfuecks@gmail.com + * + * @param string|array text to replace + * @param string|array replacement text + * @param string|array subject text + * @param integer number of matched and replaced needles will be returned via this parameter which is passed by reference + * @return string if the input was a string + * @return array if the input was an array + */ + public static function str_ireplace($search, $replace, $str, & $count = NULL) + { + if ( ! isset(self::$called[__FUNCTION__])) + { + require SYSPATH.'core/utf8/'.__FUNCTION__.EXT; + + // Function has been called + self::$called[__FUNCTION__] = TRUE; + } + + return _str_ireplace($search, $replace, $str, $count); + } + + /** + * Case-insenstive UTF-8 version of strstr. Returns all of input string + * from the first occurrence of needle to the end. + * @see http://php.net/stristr + * + * @author Harry Fuecks <hfuecks@gmail.com> + * + * @param string input string + * @param string needle + * @return string matched substring if found + * @return boolean FALSE if the substring was not found + */ + public static function stristr($str, $search) + { + if ( ! isset(self::$called[__FUNCTION__])) + { + require SYSPATH.'core/utf8/'.__FUNCTION__.EXT; + + // Function has been called + self::$called[__FUNCTION__] = TRUE; + } + + return _stristr($str, $search); + } + + /** + * Finds the length of the initial segment matching mask. + * @see http://php.net/strspn + * + * @author Harry Fuecks <hfuecks@gmail.com> + * + * @param string input string + * @param string mask for search + * @param integer start position of the string to examine + * @param integer length of the string to examine + * @return integer length of the initial segment that contains characters in the mask + */ + public static function strspn($str, $mask, $offset = NULL, $length = NULL) + { + if ( ! isset(self::$called[__FUNCTION__])) + { + require SYSPATH.'core/utf8/'.__FUNCTION__.EXT; + + // Function has been called + self::$called[__FUNCTION__] = TRUE; + } + + return _strspn($str, $mask, $offset, $length); + } + + /** + * Finds the length of the initial segment not matching mask. + * @see http://php.net/strcspn + * + * @author Harry Fuecks <hfuecks@gmail.com> + * + * @param string input string + * @param string mask for search + * @param integer start position of the string to examine + * @param integer length of the string to examine + * @return integer length of the initial segment that contains characters not in the mask + */ + public static function strcspn($str, $mask, $offset = NULL, $length = NULL) + { + if ( ! isset(self::$called[__FUNCTION__])) + { + require SYSPATH.'core/utf8/'.__FUNCTION__.EXT; + + // Function has been called + self::$called[__FUNCTION__] = TRUE; + } + + return _strcspn($str, $mask, $offset, $length); + } + + /** + * Pads a UTF-8 string to a certain length with another string. + * @see http://php.net/str_pad + * + * @author Harry Fuecks <hfuecks@gmail.com> + * + * @param string input string + * @param integer desired string length after padding + * @param string string to use as padding + * @param string padding type: STR_PAD_RIGHT, STR_PAD_LEFT, or STR_PAD_BOTH + * @return string + */ + public static function str_pad($str, $final_str_length, $pad_str = ' ', $pad_type = STR_PAD_RIGHT) + { + if ( ! isset(self::$called[__FUNCTION__])) + { + require SYSPATH.'core/utf8/'.__FUNCTION__.EXT; + + // Function has been called + self::$called[__FUNCTION__] = TRUE; + } + + return _str_pad($str, $final_str_length, $pad_str, $pad_type); + } + + /** + * Converts a UTF-8 string to an array. + * @see http://php.net/str_split + * + * @author Harry Fuecks <hfuecks@gmail.com> + * + * @param string input string + * @param integer maximum length of each chunk + * @return array + */ + public static function str_split($str, $split_length = 1) + { + if ( ! isset(self::$called[__FUNCTION__])) + { + require SYSPATH.'core/utf8/'.__FUNCTION__.EXT; + + // Function has been called + self::$called[__FUNCTION__] = TRUE; + } + + return _str_split($str, $split_length); + } + + /** + * Reverses a UTF-8 string. + * @see http://php.net/strrev + * + * @author Harry Fuecks <hfuecks@gmail.com> + * + * @param string string to be reversed + * @return string + */ + public static function strrev($str) + { + if ( ! isset(self::$called[__FUNCTION__])) + { + require SYSPATH.'core/utf8/'.__FUNCTION__.EXT; + + // Function has been called + self::$called[__FUNCTION__] = TRUE; + } + + return _strrev($str); + } + + /** + * Strips whitespace (or other UTF-8 characters) from the beginning and + * end of a string. + * @see http://php.net/trim + * + * @author Andreas Gohr <andi@splitbrain.org> + * + * @param string input string + * @param string string of characters to remove + * @return string + */ + public static function trim($str, $charlist = NULL) + { + if ( ! isset(self::$called[__FUNCTION__])) + { + require SYSPATH.'core/utf8/'.__FUNCTION__.EXT; + + // Function has been called + self::$called[__FUNCTION__] = TRUE; + } + + return _trim($str, $charlist); + } + + /** + * Strips whitespace (or other UTF-8 characters) from the beginning of a string. + * @see http://php.net/ltrim + * + * @author Andreas Gohr <andi@splitbrain.org> + * + * @param string input string + * @param string string of characters to remove + * @return string + */ + public static function ltrim($str, $charlist = NULL) + { + if ( ! isset(self::$called[__FUNCTION__])) + { + require SYSPATH.'core/utf8/'.__FUNCTION__.EXT; + + // Function has been called + self::$called[__FUNCTION__] = TRUE; + } + + return _ltrim($str, $charlist); + } + + /** + * Strips whitespace (or other UTF-8 characters) from the end of a string. + * @see http://php.net/rtrim + * + * @author Andreas Gohr <andi@splitbrain.org> + * + * @param string input string + * @param string string of characters to remove + * @return string + */ + public static function rtrim($str, $charlist = NULL) + { + if ( ! isset(self::$called[__FUNCTION__])) + { + require SYSPATH.'core/utf8/'.__FUNCTION__.EXT; + + // Function has been called + self::$called[__FUNCTION__] = TRUE; + } + + return _rtrim($str, $charlist); + } + + /** + * Returns the unicode ordinal for a character. + * @see http://php.net/ord + * + * @author Harry Fuecks <hfuecks@gmail.com> + * + * @param string UTF-8 encoded character + * @return integer + */ + public static function ord($chr) + { + if ( ! isset(self::$called[__FUNCTION__])) + { + require SYSPATH.'core/utf8/'.__FUNCTION__.EXT; + + // Function has been called + self::$called[__FUNCTION__] = TRUE; + } + + return _ord($chr); + } + + /** + * Takes an UTF-8 string and returns an array of ints representing the Unicode characters. + * Astral planes are supported i.e. the ints in the output can be > 0xFFFF. + * Occurrances of the BOM are ignored. Surrogates are not allowed. + * + * The Original Code is Mozilla Communicator client code. + * The Initial Developer of the Original Code is Netscape Communications Corporation. + * Portions created by the Initial Developer are Copyright (C) 1998 the Initial Developer. + * Ported to PHP by Henri Sivonen <hsivonen@iki.fi>, see http://hsivonen.iki.fi/php-utf8/. + * Slight modifications to fit with phputf8 library by Harry Fuecks <hfuecks@gmail.com>. + * + * @param string UTF-8 encoded string + * @return array unicode code points + * @return boolean FALSE if the string is invalid + */ + public static function to_unicode($str) + { + if ( ! isset(self::$called[__FUNCTION__])) + { + require SYSPATH.'core/utf8/'.__FUNCTION__.EXT; + + // Function has been called + self::$called[__FUNCTION__] = TRUE; + } + + return _to_unicode($str); + } + + /** + * Takes an array of ints representing the Unicode characters and returns a UTF-8 string. + * Astral planes are supported i.e. the ints in the input can be > 0xFFFF. + * Occurrances of the BOM are ignored. Surrogates are not allowed. + * + * The Original Code is Mozilla Communicator client code. + * The Initial Developer of the Original Code is Netscape Communications Corporation. + * Portions created by the Initial Developer are Copyright (C) 1998 the Initial Developer. + * Ported to PHP by Henri Sivonen <hsivonen@iki.fi>, see http://hsivonen.iki.fi/php-utf8/. + * Slight modifications to fit with phputf8 library by Harry Fuecks <hfuecks@gmail.com>. + * + * @param array unicode code points representing a string + * @return string utf8 string of characters + * @return boolean FALSE if a code point cannot be found + */ + public static function from_unicode($arr) + { + if ( ! isset(self::$called[__FUNCTION__])) + { + require SYSPATH.'core/utf8/'.__FUNCTION__.EXT; + + // Function has been called + self::$called[__FUNCTION__] = TRUE; + } + + return _from_unicode($arr); + } + +} // End utf8
\ No newline at end of file diff --git a/kohana/core/utf8/from_unicode.php b/kohana/core/utf8/from_unicode.php new file mode 100644 index 00000000..49e25ddf --- /dev/null +++ b/kohana/core/utf8/from_unicode.php @@ -0,0 +1,68 @@ +<?php defined('SYSPATH') or die('No direct script access.'); +/** + * utf8::from_unicode + * + * @package Core + * @author Kohana Team + * @copyright (c) 2007 Kohana Team + * @copyright (c) 2005 Harry Fuecks + * @license http://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt + */ +function _from_unicode($arr) +{ + ob_start(); + + $keys = array_keys($arr); + + foreach ($keys as $k) + { + // ASCII range (including control chars) + if (($arr[$k] >= 0) AND ($arr[$k] <= 0x007f)) + { + echo chr($arr[$k]); + } + // 2 byte sequence + elseif ($arr[$k] <= 0x07ff) + { + echo chr(0xc0 | ($arr[$k] >> 6)); + echo chr(0x80 | ($arr[$k] & 0x003f)); + } + // Byte order mark (skip) + elseif ($arr[$k] == 0xFEFF) + { + // nop -- zap the BOM + } + // Test for illegal surrogates + elseif ($arr[$k] >= 0xD800 AND $arr[$k] <= 0xDFFF) + { + // Found a surrogate + trigger_error('utf8::from_unicode: Illegal surrogate at index: '.$k.', value: '.$arr[$k], E_USER_WARNING); + return FALSE; + } + // 3 byte sequence + elseif ($arr[$k] <= 0xffff) + { + echo chr(0xe0 | ($arr[$k] >> 12)); + echo chr(0x80 | (($arr[$k] >> 6) & 0x003f)); + echo chr(0x80 | ($arr[$k] & 0x003f)); + } + // 4 byte sequence + elseif ($arr[$k] <= 0x10ffff) + { + echo chr(0xf0 | ($arr[$k] >> 18)); + echo chr(0x80 | (($arr[$k] >> 12) & 0x3f)); + echo chr(0x80 | (($arr[$k] >> 6) & 0x3f)); + echo chr(0x80 | ($arr[$k] & 0x3f)); + } + // Out of range + else + { + trigger_error('utf8::from_unicode: Codepoint out of Unicode range at index: '.$k.', value: '.$arr[$k], E_USER_WARNING); + return FALSE; + } + } + + $result = ob_get_contents(); + ob_end_clean(); + return $result; +} diff --git a/kohana/core/utf8/ltrim.php b/kohana/core/utf8/ltrim.php new file mode 100644 index 00000000..45297342 --- /dev/null +++ b/kohana/core/utf8/ltrim.php @@ -0,0 +1,22 @@ +<?php defined('SYSPATH') or die('No direct script access.'); +/** + * utf8::ltrim + * + * @package Core + * @author Kohana Team + * @copyright (c) 2007 Kohana Team + * @copyright (c) 2005 Harry Fuecks + * @license http://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt + */ +function _ltrim($str, $charlist = NULL) +{ + if ($charlist === NULL) + return ltrim($str); + + if (utf8::is_ascii($charlist)) + return ltrim($str, $charlist); + + $charlist = preg_replace('#[-\[\]:\\\\^/]#', '\\\\$0', $charlist); + + return preg_replace('/^['.$charlist.']+/u', '', $str); +}
\ No newline at end of file diff --git a/kohana/core/utf8/ord.php b/kohana/core/utf8/ord.php new file mode 100644 index 00000000..c21288a6 --- /dev/null +++ b/kohana/core/utf8/ord.php @@ -0,0 +1,88 @@ +<?php defined('SYSPATH') or die('No direct script access.'); +/** + * utf8::ord + * + * @package Core + * @author Kohana Team + * @copyright (c) 2007 Kohana Team + * @copyright (c) 2005 Harry Fuecks + * @license http://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt + */ +function _ord($chr) +{ + $ord0 = ord($chr); + + if ($ord0 >= 0 AND $ord0 <= 127) + { + return $ord0; + } + + if ( ! isset($chr[1])) + { + trigger_error('Short sequence - at least 2 bytes expected, only 1 seen', E_USER_WARNING); + return FALSE; + } + + $ord1 = ord($chr[1]); + + if ($ord0 >= 192 AND $ord0 <= 223) + { + return ($ord0 - 192) * 64 + ($ord1 - 128); + } + + if ( ! isset($chr[2])) + { + trigger_error('Short sequence - at least 3 bytes expected, only 2 seen', E_USER_WARNING); + return FALSE; + } + + $ord2 = ord($chr[2]); + + if ($ord0 >= 224 AND $ord0 <= 239) + { + return ($ord0 - 224) * 4096 + ($ord1 - 128) * 64 + ($ord2 - 128); + } + + if ( ! isset($chr[3])) + { + trigger_error('Short sequence - at least 4 bytes expected, only 3 seen', E_USER_WARNING); + return FALSE; + } + + $ord3 = ord($chr[3]); + + if ($ord0 >= 240 AND $ord0 <= 247) + { + return ($ord0 - 240) * 262144 + ($ord1 - 128) * 4096 + ($ord2-128) * 64 + ($ord3 - 128); + } + + if ( ! isset($chr[4])) + { + trigger_error('Short sequence - at least 5 bytes expected, only 4 seen', E_USER_WARNING); + return FALSE; + } + + $ord4 = ord($chr[4]); + + if ($ord0 >= 248 AND $ord0 <= 251) + { + return ($ord0 - 248) * 16777216 + ($ord1-128) * 262144 + ($ord2 - 128) * 4096 + ($ord3 - 128) * 64 + ($ord4 - 128); + } + + if ( ! isset($chr[5])) + { + trigger_error('Short sequence - at least 6 bytes expected, only 5 seen', E_USER_WARNING); + return FALSE; + } + + if ($ord0 >= 252 AND $ord0 <= 253) + { + return ($ord0 - 252) * 1073741824 + ($ord1 - 128) * 16777216 + ($ord2 - 128) * 262144 + ($ord3 - 128) * 4096 + ($ord4 - 128) * 64 + (ord($c[5]) - 128); + } + + if ($ord0 >= 254 AND $ord0 <= 255) + { + trigger_error('Invalid UTF-8 with surrogate ordinal '.$ord0, E_USER_WARNING); + return FALSE; + } +}
\ No newline at end of file diff --git a/kohana/core/utf8/rtrim.php b/kohana/core/utf8/rtrim.php new file mode 100644 index 00000000..c7571eb2 --- /dev/null +++ b/kohana/core/utf8/rtrim.php @@ -0,0 +1,22 @@ +<?php defined('SYSPATH') or die('No direct script access.'); +/** + * utf8::rtrim + * + * @package Core + * @author Kohana Team + * @copyright (c) 2007 Kohana Team + * @copyright (c) 2005 Harry Fuecks + * @license http://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt + */ +function _rtrim($str, $charlist = NULL) +{ + if ($charlist === NULL) + return rtrim($str); + + if (utf8::is_ascii($charlist)) + return rtrim($str, $charlist); + + $charlist = preg_replace('#[-\[\]:\\\\^/]#', '\\\\$0', $charlist); + + return preg_replace('/['.$charlist.']++$/uD', '', $str); +}
\ No newline at end of file diff --git a/kohana/core/utf8/str_ireplace.php b/kohana/core/utf8/str_ireplace.php new file mode 100644 index 00000000..e10c707e --- /dev/null +++ b/kohana/core/utf8/str_ireplace.php @@ -0,0 +1,70 @@ +<?php defined('SYSPATH') or die('No direct script access.'); +/** + * utf8::str_ireplace + * + * @package Core + * @author Kohana Team + * @copyright (c) 2007 Kohana Team + * @copyright (c) 2005 Harry Fuecks + * @license http://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt + */ +function _str_ireplace($search, $replace, $str, & $count = NULL) +{ + if (utf8::is_ascii($search) AND utf8::is_ascii($replace) AND utf8::is_ascii($str)) + return str_ireplace($search, $replace, $str, $count); + + if (is_array($str)) + { + foreach ($str as $key => $val) + { + $str[$key] = utf8::str_ireplace($search, $replace, $val, $count); + } + return $str; + } + + if (is_array($search)) + { + $keys = array_keys($search); + + foreach ($keys as $k) + { + if (is_array($replace)) + { + if (array_key_exists($k, $replace)) + { + $str = utf8::str_ireplace($search[$k], $replace[$k], $str, $count); + } + else + { + $str = utf8::str_ireplace($search[$k], '', $str, $count); + } + } + else + { + $str = utf8::str_ireplace($search[$k], $replace, $str, $count); + } + } + return $str; + } + + $search = utf8::strtolower($search); + $str_lower = utf8::strtolower($str); + + $total_matched_strlen = 0; + $i = 0; + + while (preg_match('/(.*?)'.preg_quote($search, '/').'/s', $str_lower, $matches)) + { + $matched_strlen = strlen($matches[0]); + $str_lower = substr($str_lower, $matched_strlen); + + $offset = $total_matched_strlen + strlen($matches[1]) + ($i * (strlen($replace) - 1)); + $str = substr_replace($str, $replace, $offset, strlen($search)); + + $total_matched_strlen += $matched_strlen; + $i++; + } + + $count += $i; + return $str; +} diff --git a/kohana/core/utf8/str_pad.php b/kohana/core/utf8/str_pad.php new file mode 100644 index 00000000..9b7f391a --- /dev/null +++ b/kohana/core/utf8/str_pad.php @@ -0,0 +1,54 @@ +<?php defined('SYSPATH') or die('No direct script access.'); +/** + * utf8::str_pad + * + * @package Core + * @author Kohana Team + * @copyright (c) 2007 Kohana Team + * @copyright (c) 2005 Harry Fuecks + * @license http://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt + */ +function _str_pad($str, $final_str_length, $pad_str = ' ', $pad_type = STR_PAD_RIGHT) +{ + if (utf8::is_ascii($str) AND utf8::is_ascii($pad_str)) + { + return str_pad($str, $final_str_length, $pad_str, $pad_type); + } + + $str_length = utf8::strlen($str); + + if ($final_str_length <= 0 OR $final_str_length <= $str_length) + { + return $str; + } + + $pad_str_length = utf8::strlen($pad_str); + $pad_length = $final_str_length - $str_length; + + if ($pad_type == STR_PAD_RIGHT) + { + $repeat = ceil($pad_length / $pad_str_length); + return utf8::substr($str.str_repeat($pad_str, $repeat), 0, $final_str_length); + } + + if ($pad_type == STR_PAD_LEFT) + { + $repeat = ceil($pad_length / $pad_str_length); + return utf8::substr(str_repeat($pad_str, $repeat), 0, floor($pad_length)).$str; + } + + if ($pad_type == STR_PAD_BOTH) + { + $pad_length /= 2; + $pad_length_left = floor($pad_length); + $pad_length_right = ceil($pad_length); + $repeat_left = ceil($pad_length_left / $pad_str_length); + $repeat_right = ceil($pad_length_right / $pad_str_length); + + $pad_left = utf8::substr(str_repeat($pad_str, $repeat_left), 0, $pad_length_left); + $pad_right = utf8::substr(str_repeat($pad_str, $repeat_right), 0, $pad_length_left); + return $pad_left.$str.$pad_right; + } + + trigger_error('utf8::str_pad: Unknown padding type (' . $type . ')', E_USER_ERROR); +}
\ No newline at end of file diff --git a/kohana/core/utf8/str_split.php b/kohana/core/utf8/str_split.php new file mode 100644 index 00000000..128caa94 --- /dev/null +++ b/kohana/core/utf8/str_split.php @@ -0,0 +1,33 @@ +<?php defined('SYSPATH') or die('No direct script access.'); +/** + * utf8::str_split + * + * @package Core + * @author Kohana Team + * @copyright (c) 2007 Kohana Team + * @copyright (c) 2005 Harry Fuecks + * @license http://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt + */ +function _str_split($str, $split_length = 1) +{ + $split_length = (int) $split_length; + + if (utf8::is_ascii($str)) + { + return str_split($str, $split_length); + } + + if ($split_length < 1) + { + return FALSE; + } + + if (utf8::strlen($str) <= $split_length) + { + return array($str); + } + + preg_match_all('/.{'.$split_length.'}|[^\x00]{1,'.$split_length.'}$/us', $str, $matches); + + return $matches[0]; +}
\ No newline at end of file diff --git a/kohana/core/utf8/strcasecmp.php b/kohana/core/utf8/strcasecmp.php new file mode 100644 index 00000000..c18cf870 --- /dev/null +++ b/kohana/core/utf8/strcasecmp.php @@ -0,0 +1,19 @@ +<?php defined('SYSPATH') or die('No direct script access.'); +/** + * utf8::strcasecmp + * + * @package Core + * @author Kohana Team + * @copyright (c) 2007 Kohana Team + * @copyright (c) 2005 Harry Fuecks + * @license http://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt + */ +function _strcasecmp($str1, $str2) +{ + if (utf8::is_ascii($str1) AND utf8::is_ascii($str2)) + return strcasecmp($str1, $str2); + + $str1 = utf8::strtolower($str1); + $str2 = utf8::strtolower($str2); + return strcmp($str1, $str2); +}
\ No newline at end of file diff --git a/kohana/core/utf8/strcspn.php b/kohana/core/utf8/strcspn.php new file mode 100644 index 00000000..ce8460ae --- /dev/null +++ b/kohana/core/utf8/strcspn.php @@ -0,0 +1,30 @@ +<?php defined('SYSPATH') or die('No direct script access.'); +/** + * utf8::strcspn + * + * @package Core + * @author Kohana Team + * @copyright (c) 2007 Kohana Team + * @copyright (c) 2005 Harry Fuecks + * @license http://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt + */ +function _strcspn($str, $mask, $offset = NULL, $length = NULL) +{ + if ($str == '' OR $mask == '') + return 0; + + if (utf8::is_ascii($str) AND utf8::is_ascii($mask)) + return ($offset === NULL) ? strcspn($str, $mask) : (($length === NULL) ? strcspn($str, $mask, $offset) : strcspn($str, $mask, $offset, $length)); + + if ($start !== NULL OR $length !== NULL) + { + $str = utf8::substr($str, $offset, $length); + } + + // Escape these characters: - [ ] . : \ ^ / + // The . and : are escaped to prevent possible warnings about POSIX regex elements + $mask = preg_replace('#[-[\].:\\\\^/]#', '\\\\$0', $mask); + preg_match('/^[^'.$mask.']+/u', $str, $matches); + + return isset($matches[0]) ? utf8::strlen($matches[0]) : 0; +}
\ No newline at end of file diff --git a/kohana/core/utf8/stristr.php b/kohana/core/utf8/stristr.php new file mode 100644 index 00000000..b72871cf --- /dev/null +++ b/kohana/core/utf8/stristr.php @@ -0,0 +1,28 @@ +<?php defined('SYSPATH') or die('No direct script access.'); +/** + * utf8::stristr + * + * @package Core + * @author Kohana Team + * @copyright (c) 2007 Kohana Team + * @copyright (c) 2005 Harry Fuecks + * @license http://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt + */ +function _stristr($str, $search) +{ + if (utf8::is_ascii($str) AND utf8::is_ascii($search)) + return stristr($str, $search); + + if ($search == '') + return $str; + + $str_lower = utf8::strtolower($str); + $search_lower = utf8::strtolower($search); + + preg_match('/^(.*?)'.preg_quote($search, '/').'/s', $str_lower, $matches); + + if (isset($matches[1])) + return substr($str, strlen($matches[1])); + + return FALSE; +}
\ No newline at end of file diff --git a/kohana/core/utf8/strlen.php b/kohana/core/utf8/strlen.php new file mode 100644 index 00000000..1a864328 --- /dev/null +++ b/kohana/core/utf8/strlen.php @@ -0,0 +1,21 @@ +<?php defined('SYSPATH') or die('No direct script access.'); +/** + * utf8::strlen + * + * @package Core + * @author Kohana Team + * @copyright (c) 2007 Kohana Team + * @copyright (c) 2005 Harry Fuecks + * @license http://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt + */ +function _strlen($str) +{ + // Try mb_strlen() first because it's faster than combination of is_ascii() and strlen() + if (SERVER_UTF8) + return mb_strlen($str); + + if (utf8::is_ascii($str)) + return strlen($str); + + return strlen(utf8_decode($str)); +}
\ No newline at end of file diff --git a/kohana/core/utf8/strpos.php b/kohana/core/utf8/strpos.php new file mode 100644 index 00000000..577bc4ee --- /dev/null +++ b/kohana/core/utf8/strpos.php @@ -0,0 +1,30 @@ +<?php defined('SYSPATH') or die('No direct script access.'); +/** + * utf8::strpos + * + * @package Core + * @author Kohana Team + * @copyright (c) 2007 Kohana Team + * @copyright (c) 2005 Harry Fuecks + * @license http://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt + */ +function _strpos($str, $search, $offset = 0) +{ + $offset = (int) $offset; + + if (SERVER_UTF8) + return mb_strpos($str, $search, $offset); + + if (utf8::is_ascii($str) AND utf8::is_ascii($search)) + return strpos($str, $search, $offset); + + if ($offset == 0) + { + $array = explode($search, $str, 2); + return isset($array[1]) ? utf8::strlen($array[0]) : FALSE; + } + + $str = utf8::substr($str, $offset); + $pos = utf8::strpos($str, $search); + return ($pos === FALSE) ? FALSE : $pos + $offset; +}
\ No newline at end of file diff --git a/kohana/core/utf8/strrev.php b/kohana/core/utf8/strrev.php new file mode 100644 index 00000000..a1e46de4 --- /dev/null +++ b/kohana/core/utf8/strrev.php @@ -0,0 +1,18 @@ +<?php defined('SYSPATH') or die('No direct script access.'); +/** + * utf8::strrev + * + * @package Core + * @author Kohana Team + * @copyright (c) 2007 Kohana Team + * @copyright (c) 2005 Harry Fuecks + * @license http://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt + */ +function _strrev($str) +{ + if (utf8::is_ascii($str)) + return strrev($str); + + preg_match_all('/./us', $str, $matches); + return implode('', array_reverse($matches[0])); +}
\ No newline at end of file diff --git a/kohana/core/utf8/strrpos.php b/kohana/core/utf8/strrpos.php new file mode 100644 index 00000000..3ebba400 --- /dev/null +++ b/kohana/core/utf8/strrpos.php @@ -0,0 +1,30 @@ +<?php defined('SYSPATH') or die('No direct script access.'); +/** + * utf8::strrpos + * + * @package Core + * @author Kohana Team + * @copyright (c) 2007 Kohana Team + * @copyright (c) 2005 Harry Fuecks + * @license http://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt + */ +function _strrpos($str, $search, $offset = 0) +{ + $offset = (int) $offset; + + if (SERVER_UTF8) + return mb_strrpos($str, $search, $offset); + + if (utf8::is_ascii($str) AND utf8::is_ascii($search)) + return strrpos($str, $search, $offset); + + if ($offset == 0) + { + $array = explode($search, $str, -1); + return isset($array[0]) ? utf8::strlen(implode($search, $array)) : FALSE; + } + + $str = utf8::substr($str, $offset); + $pos = utf8::strrpos($str, $search); + return ($pos === FALSE) ? FALSE : $pos + $offset; +}
\ No newline at end of file diff --git a/kohana/core/utf8/strspn.php b/kohana/core/utf8/strspn.php new file mode 100644 index 00000000..de03b1f8 --- /dev/null +++ b/kohana/core/utf8/strspn.php @@ -0,0 +1,30 @@ +<?php defined('SYSPATH') or die('No direct script access.'); +/** + * utf8::strspn + * + * @package Core + * @author Kohana Team + * @copyright (c) 2007 Kohana Team + * @copyright (c) 2005 Harry Fuecks + * @license http://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt + */ +function _strspn($str, $mask, $offset = NULL, $length = NULL) +{ + if ($str == '' OR $mask == '') + return 0; + + if (utf8::is_ascii($str) AND utf8::is_ascii($mask)) + return ($offset === NULL) ? strspn($str, $mask) : (($length === NULL) ? strspn($str, $mask, $offset) : strspn($str, $mask, $offset, $length)); + + if ($offset !== NULL OR $length !== NULL) + { + $str = utf8::substr($str, $offset, $length); + } + + // Escape these characters: - [ ] . : \ ^ / + // The . and : are escaped to prevent possible warnings about POSIX regex elements + $mask = preg_replace('#[-[\].:\\\\^/]#', '\\\\$0', $mask); + preg_match('/^[^'.$mask.']+/u', $str, $matches); + + return isset($matches[0]) ? utf8::strlen($matches[0]) : 0; +}
\ No newline at end of file diff --git a/kohana/core/utf8/strtolower.php b/kohana/core/utf8/strtolower.php new file mode 100644 index 00000000..a33b9fd0 --- /dev/null +++ b/kohana/core/utf8/strtolower.php @@ -0,0 +1,84 @@ +<?php defined('SYSPATH') or die('No direct script access.'); +/** + * utf8::strtolower + * + * @package Core + * @author Kohana Team + * @copyright (c) 2007 Kohana Team + * @copyright (c) 2005 Harry Fuecks + * @license http://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt + */ +function _strtolower($str) +{ + if (SERVER_UTF8) + return mb_strtolower($str); + + if (utf8::is_ascii($str)) + return strtolower($str); + + static $UTF8_UPPER_TO_LOWER = NULL; + + if ($UTF8_UPPER_TO_LOWER === NULL) + { + $UTF8_UPPER_TO_LOWER = array( + 0x0041=>0x0061, 0x03A6=>0x03C6, 0x0162=>0x0163, 0x00C5=>0x00E5, 0x0042=>0x0062, + 0x0139=>0x013A, 0x00C1=>0x00E1, 0x0141=>0x0142, 0x038E=>0x03CD, 0x0100=>0x0101, + 0x0490=>0x0491, 0x0394=>0x03B4, 0x015A=>0x015B, 0x0044=>0x0064, 0x0393=>0x03B3, + 0x00D4=>0x00F4, 0x042A=>0x044A, 0x0419=>0x0439, 0x0112=>0x0113, 0x041C=>0x043C, + 0x015E=>0x015F, 0x0143=>0x0144, 0x00CE=>0x00EE, 0x040E=>0x045E, 0x042F=>0x044F, + 0x039A=>0x03BA, 0x0154=>0x0155, 0x0049=>0x0069, 0x0053=>0x0073, 0x1E1E=>0x1E1F, + 0x0134=>0x0135, 0x0427=>0x0447, 0x03A0=>0x03C0, 0x0418=>0x0438, 0x00D3=>0x00F3, + 0x0420=>0x0440, 0x0404=>0x0454, 0x0415=>0x0435, 0x0429=>0x0449, 0x014A=>0x014B, + 0x0411=>0x0431, 0x0409=>0x0459, 0x1E02=>0x1E03, 0x00D6=>0x00F6, 0x00D9=>0x00F9, + 0x004E=>0x006E, 0x0401=>0x0451, 0x03A4=>0x03C4, 0x0423=>0x0443, 0x015C=>0x015D, + 0x0403=>0x0453, 0x03A8=>0x03C8, 0x0158=>0x0159, 0x0047=>0x0067, 0x00C4=>0x00E4, + 0x0386=>0x03AC, 0x0389=>0x03AE, 0x0166=>0x0167, 0x039E=>0x03BE, 0x0164=>0x0165, + 0x0116=>0x0117, 0x0108=>0x0109, 0x0056=>0x0076, 0x00DE=>0x00FE, 0x0156=>0x0157, + 0x00DA=>0x00FA, 0x1E60=>0x1E61, 0x1E82=>0x1E83, 0x00C2=>0x00E2, 0x0118=>0x0119, + 0x0145=>0x0146, 0x0050=>0x0070, 0x0150=>0x0151, 0x042E=>0x044E, 0x0128=>0x0129, + 0x03A7=>0x03C7, 0x013D=>0x013E, 0x0422=>0x0442, 0x005A=>0x007A, 0x0428=>0x0448, + 0x03A1=>0x03C1, 0x1E80=>0x1E81, 0x016C=>0x016D, 0x00D5=>0x00F5, 0x0055=>0x0075, + 0x0176=>0x0177, 0x00DC=>0x00FC, 0x1E56=>0x1E57, 0x03A3=>0x03C3, 0x041A=>0x043A, + 0x004D=>0x006D, 0x016A=>0x016B, 0x0170=>0x0171, 0x0424=>0x0444, 0x00CC=>0x00EC, + 0x0168=>0x0169, 0x039F=>0x03BF, 0x004B=>0x006B, 0x00D2=>0x00F2, 0x00C0=>0x00E0, + 0x0414=>0x0434, 0x03A9=>0x03C9, 0x1E6A=>0x1E6B, 0x00C3=>0x00E3, 0x042D=>0x044D, + 0x0416=>0x0436, 0x01A0=>0x01A1, 0x010C=>0x010D, 0x011C=>0x011D, 0x00D0=>0x00F0, + 0x013B=>0x013C, 0x040F=>0x045F, 0x040A=>0x045A, 0x00C8=>0x00E8, 0x03A5=>0x03C5, + 0x0046=>0x0066, 0x00DD=>0x00FD, 0x0043=>0x0063, 0x021A=>0x021B, 0x00CA=>0x00EA, + 0x0399=>0x03B9, 0x0179=>0x017A, 0x00CF=>0x00EF, 0x01AF=>0x01B0, 0x0045=>0x0065, + 0x039B=>0x03BB, 0x0398=>0x03B8, 0x039C=>0x03BC, 0x040C=>0x045C, 0x041F=>0x043F, + 0x042C=>0x044C, 0x00DE=>0x00FE, 0x00D0=>0x00F0, 0x1EF2=>0x1EF3, 0x0048=>0x0068, + 0x00CB=>0x00EB, 0x0110=>0x0111, 0x0413=>0x0433, 0x012E=>0x012F, 0x00C6=>0x00E6, + 0x0058=>0x0078, 0x0160=>0x0161, 0x016E=>0x016F, 0x0391=>0x03B1, 0x0407=>0x0457, + 0x0172=>0x0173, 0x0178=>0x00FF, 0x004F=>0x006F, 0x041B=>0x043B, 0x0395=>0x03B5, + 0x0425=>0x0445, 0x0120=>0x0121, 0x017D=>0x017E, 0x017B=>0x017C, 0x0396=>0x03B6, + 0x0392=>0x03B2, 0x0388=>0x03AD, 0x1E84=>0x1E85, 0x0174=>0x0175, 0x0051=>0x0071, + 0x0417=>0x0437, 0x1E0A=>0x1E0B, 0x0147=>0x0148, 0x0104=>0x0105, 0x0408=>0x0458, + 0x014C=>0x014D, 0x00CD=>0x00ED, 0x0059=>0x0079, 0x010A=>0x010B, 0x038F=>0x03CE, + 0x0052=>0x0072, 0x0410=>0x0430, 0x0405=>0x0455, 0x0402=>0x0452, 0x0126=>0x0127, + 0x0136=>0x0137, 0x012A=>0x012B, 0x038A=>0x03AF, 0x042B=>0x044B, 0x004C=>0x006C, + 0x0397=>0x03B7, 0x0124=>0x0125, 0x0218=>0x0219, 0x00DB=>0x00FB, 0x011E=>0x011F, + 0x041E=>0x043E, 0x1E40=>0x1E41, 0x039D=>0x03BD, 0x0106=>0x0107, 0x03AB=>0x03CB, + 0x0426=>0x0446, 0x00DE=>0x00FE, 0x00C7=>0x00E7, 0x03AA=>0x03CA, 0x0421=>0x0441, + 0x0412=>0x0432, 0x010E=>0x010F, 0x00D8=>0x00F8, 0x0057=>0x0077, 0x011A=>0x011B, + 0x0054=>0x0074, 0x004A=>0x006A, 0x040B=>0x045B, 0x0406=>0x0456, 0x0102=>0x0103, + 0x039B=>0x03BB, 0x00D1=>0x00F1, 0x041D=>0x043D, 0x038C=>0x03CC, 0x00C9=>0x00E9, + 0x00D0=>0x00F0, 0x0407=>0x0457, 0x0122=>0x0123, + ); + } + + $uni = utf8::to_unicode($str); + + if ($uni === FALSE) + return FALSE; + + for ($i = 0, $c = count($uni); $i < $c; $i++) + { + if (isset($UTF8_UPPER_TO_LOWER[$uni[$i]])) + { + $uni[$i] = $UTF8_UPPER_TO_LOWER[$uni[$i]]; + } + } + + return utf8::from_unicode($uni); +}
\ No newline at end of file diff --git a/kohana/core/utf8/strtoupper.php b/kohana/core/utf8/strtoupper.php new file mode 100644 index 00000000..76837b36 --- /dev/null +++ b/kohana/core/utf8/strtoupper.php @@ -0,0 +1,84 @@ +<?php defined('SYSPATH') or die('No direct script access.'); +/** + * utf8::strtoupper + * + * @package Core + * @author Kohana Team + * @copyright (c) 2007 Kohana Team + * @copyright (c) 2005 Harry Fuecks + * @license http://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt + */ +function _strtoupper($str) +{ + if (SERVER_UTF8) + return mb_strtoupper($str); + + if (utf8::is_ascii($str)) + return strtoupper($str); + + static $UTF8_LOWER_TO_UPPER = NULL; + + if ($UTF8_LOWER_TO_UPPER === NULL) + { + $UTF8_LOWER_TO_UPPER = array( + 0x0061=>0x0041, 0x03C6=>0x03A6, 0x0163=>0x0162, 0x00E5=>0x00C5, 0x0062=>0x0042, + 0x013A=>0x0139, 0x00E1=>0x00C1, 0x0142=>0x0141, 0x03CD=>0x038E, 0x0101=>0x0100, + 0x0491=>0x0490, 0x03B4=>0x0394, 0x015B=>0x015A, 0x0064=>0x0044, 0x03B3=>0x0393, + 0x00F4=>0x00D4, 0x044A=>0x042A, 0x0439=>0x0419, 0x0113=>0x0112, 0x043C=>0x041C, + 0x015F=>0x015E, 0x0144=>0x0143, 0x00EE=>0x00CE, 0x045E=>0x040E, 0x044F=>0x042F, + 0x03BA=>0x039A, 0x0155=>0x0154, 0x0069=>0x0049, 0x0073=>0x0053, 0x1E1F=>0x1E1E, + 0x0135=>0x0134, 0x0447=>0x0427, 0x03C0=>0x03A0, 0x0438=>0x0418, 0x00F3=>0x00D3, + 0x0440=>0x0420, 0x0454=>0x0404, 0x0435=>0x0415, 0x0449=>0x0429, 0x014B=>0x014A, + 0x0431=>0x0411, 0x0459=>0x0409, 0x1E03=>0x1E02, 0x00F6=>0x00D6, 0x00F9=>0x00D9, + 0x006E=>0x004E, 0x0451=>0x0401, 0x03C4=>0x03A4, 0x0443=>0x0423, 0x015D=>0x015C, + 0x0453=>0x0403, 0x03C8=>0x03A8, 0x0159=>0x0158, 0x0067=>0x0047, 0x00E4=>0x00C4, + 0x03AC=>0x0386, 0x03AE=>0x0389, 0x0167=>0x0166, 0x03BE=>0x039E, 0x0165=>0x0164, + 0x0117=>0x0116, 0x0109=>0x0108, 0x0076=>0x0056, 0x00FE=>0x00DE, 0x0157=>0x0156, + 0x00FA=>0x00DA, 0x1E61=>0x1E60, 0x1E83=>0x1E82, 0x00E2=>0x00C2, 0x0119=>0x0118, + 0x0146=>0x0145, 0x0070=>0x0050, 0x0151=>0x0150, 0x044E=>0x042E, 0x0129=>0x0128, + 0x03C7=>0x03A7, 0x013E=>0x013D, 0x0442=>0x0422, 0x007A=>0x005A, 0x0448=>0x0428, + 0x03C1=>0x03A1, 0x1E81=>0x1E80, 0x016D=>0x016C, 0x00F5=>0x00D5, 0x0075=>0x0055, + 0x0177=>0x0176, 0x00FC=>0x00DC, 0x1E57=>0x1E56, 0x03C3=>0x03A3, 0x043A=>0x041A, + 0x006D=>0x004D, 0x016B=>0x016A, 0x0171=>0x0170, 0x0444=>0x0424, 0x00EC=>0x00CC, + 0x0169=>0x0168, 0x03BF=>0x039F, 0x006B=>0x004B, 0x00F2=>0x00D2, 0x00E0=>0x00C0, + 0x0434=>0x0414, 0x03C9=>0x03A9, 0x1E6B=>0x1E6A, 0x00E3=>0x00C3, 0x044D=>0x042D, + 0x0436=>0x0416, 0x01A1=>0x01A0, 0x010D=>0x010C, 0x011D=>0x011C, 0x00F0=>0x00D0, + 0x013C=>0x013B, 0x045F=>0x040F, 0x045A=>0x040A, 0x00E8=>0x00C8, 0x03C5=>0x03A5, + 0x0066=>0x0046, 0x00FD=>0x00DD, 0x0063=>0x0043, 0x021B=>0x021A, 0x00EA=>0x00CA, + 0x03B9=>0x0399, 0x017A=>0x0179, 0x00EF=>0x00CF, 0x01B0=>0x01AF, 0x0065=>0x0045, + 0x03BB=>0x039B, 0x03B8=>0x0398, 0x03BC=>0x039C, 0x045C=>0x040C, 0x043F=>0x041F, + 0x044C=>0x042C, 0x00FE=>0x00DE, 0x00F0=>0x00D0, 0x1EF3=>0x1EF2, 0x0068=>0x0048, + 0x00EB=>0x00CB, 0x0111=>0x0110, 0x0433=>0x0413, 0x012F=>0x012E, 0x00E6=>0x00C6, + 0x0078=>0x0058, 0x0161=>0x0160, 0x016F=>0x016E, 0x03B1=>0x0391, 0x0457=>0x0407, + 0x0173=>0x0172, 0x00FF=>0x0178, 0x006F=>0x004F, 0x043B=>0x041B, 0x03B5=>0x0395, + 0x0445=>0x0425, 0x0121=>0x0120, 0x017E=>0x017D, 0x017C=>0x017B, 0x03B6=>0x0396, + 0x03B2=>0x0392, 0x03AD=>0x0388, 0x1E85=>0x1E84, 0x0175=>0x0174, 0x0071=>0x0051, + 0x0437=>0x0417, 0x1E0B=>0x1E0A, 0x0148=>0x0147, 0x0105=>0x0104, 0x0458=>0x0408, + 0x014D=>0x014C, 0x00ED=>0x00CD, 0x0079=>0x0059, 0x010B=>0x010A, 0x03CE=>0x038F, + 0x0072=>0x0052, 0x0430=>0x0410, 0x0455=>0x0405, 0x0452=>0x0402, 0x0127=>0x0126, + 0x0137=>0x0136, 0x012B=>0x012A, 0x03AF=>0x038A, 0x044B=>0x042B, 0x006C=>0x004C, + 0x03B7=>0x0397, 0x0125=>0x0124, 0x0219=>0x0218, 0x00FB=>0x00DB, 0x011F=>0x011E, + 0x043E=>0x041E, 0x1E41=>0x1E40, 0x03BD=>0x039D, 0x0107=>0x0106, 0x03CB=>0x03AB, + 0x0446=>0x0426, 0x00FE=>0x00DE, 0x00E7=>0x00C7, 0x03CA=>0x03AA, 0x0441=>0x0421, + 0x0432=>0x0412, 0x010F=>0x010E, 0x00F8=>0x00D8, 0x0077=>0x0057, 0x011B=>0x011A, + 0x0074=>0x0054, 0x006A=>0x004A, 0x045B=>0x040B, 0x0456=>0x0406, 0x0103=>0x0102, + 0x03BB=>0x039B, 0x00F1=>0x00D1, 0x043D=>0x041D, 0x03CC=>0x038C, 0x00E9=>0x00C9, + 0x00F0=>0x00D0, 0x0457=>0x0407, 0x0123=>0x0122, + ); + } + + $uni = utf8::to_unicode($str); + + if ($uni === FALSE) + return FALSE; + + for ($i = 0, $c = count($uni); $i < $c; $i++) + { + if (isset($UTF8_LOWER_TO_UPPER[$uni[$i]])) + { + $uni[$i] = $UTF8_LOWER_TO_UPPER[$uni[$i]]; + } + } + + return utf8::from_unicode($uni); +}
\ No newline at end of file diff --git a/kohana/core/utf8/substr.php b/kohana/core/utf8/substr.php new file mode 100644 index 00000000..3151ca21 --- /dev/null +++ b/kohana/core/utf8/substr.php @@ -0,0 +1,75 @@ +<?php defined('SYSPATH') or die('No direct script access.'); +/** + * utf8::substr + * + * @package Core + * @author Kohana Team + * @copyright (c) 2007 Kohana Team + * @copyright (c) 2005 Harry Fuecks + * @license http://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt + */ +function _substr($str, $offset, $length = NULL) +{ + if (SERVER_UTF8) + return ($length === NULL) ? mb_substr($str, $offset) : mb_substr($str, $offset, $length); + + if (utf8::is_ascii($str)) + return ($length === NULL) ? substr($str, $offset) : substr($str, $offset, $length); + + // Normalize params + $str = (string) $str; + $strlen = utf8::strlen($str); + $offset = (int) ($offset < 0) ? max(0, $strlen + $offset) : $offset; // Normalize to positive offset + $length = ($length === NULL) ? NULL : (int) $length; + + // Impossible + if ($length === 0 OR $offset >= $strlen OR ($length < 0 AND $length <= $offset - $strlen)) + return ''; + + // Whole string + if ($offset == 0 AND ($length === NULL OR $length >= $strlen)) + return $str; + + // Build regex + $regex = '^'; + + // Create an offset expression + if ($offset > 0) + { + // PCRE repeating quantifiers must be less than 65536, so repeat when necessary + $x = (int) ($offset / 65535); + $y = (int) ($offset % 65535); + $regex .= ($x == 0) ? '' : '(?:.{65535}){'.$x.'}'; + $regex .= ($y == 0) ? '' : '.{'.$y.'}'; + } + + // Create a length expression + if ($length === NULL) + { + $regex .= '(.*)'; // No length set, grab it all + } + // Find length from the left (positive length) + elseif ($length > 0) + { + // Reduce length so that it can't go beyond the end of the string + $length = min($strlen - $offset, $length); + + $x = (int) ($length / 65535); + $y = (int) ($length % 65535); + $regex .= '('; + $regex .= ($x == 0) ? '' : '(?:.{65535}){'.$x.'}'; + $regex .= '.{'.$y.'})'; + } + // Find length from the right (negative length) + else + { + $x = (int) (-$length / 65535); + $y = (int) (-$length % 65535); + $regex .= '(.*)'; + $regex .= ($x == 0) ? '' : '(?:.{65535}){'.$x.'}'; + $regex .= '.{'.$y.'}'; + } + + preg_match('/'.$regex.'/us', $str, $matches); + return $matches[1]; +}
\ No newline at end of file diff --git a/kohana/core/utf8/substr_replace.php b/kohana/core/utf8/substr_replace.php new file mode 100644 index 00000000..061e8834 --- /dev/null +++ b/kohana/core/utf8/substr_replace.php @@ -0,0 +1,22 @@ +<?php defined('SYSPATH') or die('No direct script access.'); +/** + * utf8::substr_replace + * + * @package Core + * @author Kohana Team + * @copyright (c) 2007 Kohana Team + * @copyright (c) 2005 Harry Fuecks + * @license http://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt + */ +function _substr_replace($str, $replacement, $offset, $length = NULL) +{ + if (utf8::is_ascii($str)) + return ($length === NULL) ? substr_replace($str, $replacement, $offset) : substr_replace($str, $replacement, $offset, $length); + + $length = ($length === NULL) ? utf8::strlen($str) : (int) $length; + preg_match_all('/./us', $str, $str_array); + preg_match_all('/./us', $replacement, $replacement_array); + + array_splice($str_array[0], $offset, $length, $replacement_array[0]); + return implode('', $str_array[0]); +}
\ No newline at end of file diff --git a/kohana/core/utf8/to_unicode.php b/kohana/core/utf8/to_unicode.php new file mode 100644 index 00000000..93dccb19 --- /dev/null +++ b/kohana/core/utf8/to_unicode.php @@ -0,0 +1,141 @@ +<?php defined('SYSPATH') or die('No direct script access.'); +/** + * utf8::to_unicode + * + * @package Core + * @author Kohana Team + * @copyright (c) 2007 Kohana Team + * @copyright (c) 2005 Harry Fuecks + * @license http://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt + */ +function _to_unicode($str) +{ + $mState = 0; // cached expected number of octets after the current octet until the beginning of the next UTF8 character sequence + $mUcs4 = 0; // cached Unicode character + $mBytes = 1; // cached expected number of octets in the current sequence + + $out = array(); + + $len = strlen($str); + + for ($i = 0; $i < $len; $i++) + { + $in = ord($str[$i]); + + if ($mState == 0) + { + // When mState is zero we expect either a US-ASCII character or a + // multi-octet sequence. + if (0 == (0x80 & $in)) + { + // US-ASCII, pass straight through. + $out[] = $in; + $mBytes = 1; + } + elseif (0xC0 == (0xE0 & $in)) + { + // First octet of 2 octet sequence + $mUcs4 = $in; + $mUcs4 = ($mUcs4 & 0x1F) << 6; + $mState = 1; + $mBytes = 2; + } + elseif (0xE0 == (0xF0 & $in)) + { + // First octet of 3 octet sequence + $mUcs4 = $in; + $mUcs4 = ($mUcs4 & 0x0F) << 12; + $mState = 2; + $mBytes = 3; + } + elseif (0xF0 == (0xF8 & $in)) + { + // First octet of 4 octet sequence + $mUcs4 = $in; + $mUcs4 = ($mUcs4 & 0x07) << 18; + $mState = 3; + $mBytes = 4; + } + elseif (0xF8 == (0xFC & $in)) + { + // First octet of 5 octet sequence. + // + // This is illegal because the encoded codepoint must be either + // (a) not the shortest form or + // (b) outside the Unicode range of 0-0x10FFFF. + // Rather than trying to resynchronize, we will carry on until the end + // of the sequence and let the later error handling code catch it. + $mUcs4 = $in; + $mUcs4 = ($mUcs4 & 0x03) << 24; + $mState = 4; + $mBytes = 5; + } + elseif (0xFC == (0xFE & $in)) + { + // First octet of 6 octet sequence, see comments for 5 octet sequence. + $mUcs4 = $in; + $mUcs4 = ($mUcs4 & 1) << 30; + $mState = 5; + $mBytes = 6; + } + else + { + // Current octet is neither in the US-ASCII range nor a legal first octet of a multi-octet sequence. + trigger_error('utf8::to_unicode: Illegal sequence identifier in UTF-8 at byte '.$i, E_USER_WARNING); + return FALSE; + } + } + else + { + // When mState is non-zero, we expect a continuation of the multi-octet sequence + if (0x80 == (0xC0 & $in)) + { + // Legal continuation + $shift = ($mState - 1) * 6; + $tmp = $in; + $tmp = ($tmp & 0x0000003F) << $shift; + $mUcs4 |= $tmp; + + // End of the multi-octet sequence. mUcs4 now contains the final Unicode codepoint to be output + if (0 == --$mState) + { + // Check for illegal sequences and codepoints + + // From Unicode 3.1, non-shortest form is illegal + if (((2 == $mBytes) AND ($mUcs4 < 0x0080)) OR + ((3 == $mBytes) AND ($mUcs4 < 0x0800)) OR + ((4 == $mBytes) AND ($mUcs4 < 0x10000)) OR + (4 < $mBytes) OR + // From Unicode 3.2, surrogate characters are illegal + (($mUcs4 & 0xFFFFF800) == 0xD800) OR + // Codepoints outside the Unicode range are illegal + ($mUcs4 > 0x10FFFF)) + { + trigger_error('utf8::to_unicode: Illegal sequence or codepoint in UTF-8 at byte '.$i, E_USER_WARNING); + return FALSE; + } + + if (0xFEFF != $mUcs4) + { + // BOM is legal but we don't want to output it + $out[] = $mUcs4; + } + + // Initialize UTF-8 cache + $mState = 0; + $mUcs4 = 0; + $mBytes = 1; + } + } + else + { + // ((0xC0 & (*in) != 0x80) AND (mState != 0)) + // Incomplete multi-octet sequence + trigger_error('utf8::to_unicode: Incomplete multi-octet sequence in UTF-8 at byte '.$i, E_USER_WARNING); + return FALSE; + } + } + } + + return $out; +}
\ No newline at end of file diff --git a/kohana/core/utf8/transliterate_to_ascii.php b/kohana/core/utf8/transliterate_to_ascii.php new file mode 100644 index 00000000..65860217 --- /dev/null +++ b/kohana/core/utf8/transliterate_to_ascii.php @@ -0,0 +1,77 @@ +<?php defined('SYSPATH') or die('No direct script access.'); +/** + * utf8::transliterate_to_ascii + * + * @package Core + * @author Kohana Team + * @copyright (c) 2007 Kohana Team + * @copyright (c) 2005 Harry Fuecks + * @license http://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt + */ +function _transliterate_to_ascii($str, $case = 0) +{ + static $UTF8_LOWER_ACCENTS = NULL; + static $UTF8_UPPER_ACCENTS = NULL; + + if ($case <= 0) + { + if ($UTF8_LOWER_ACCENTS === NULL) + { + $UTF8_LOWER_ACCENTS = array( + 'à' => 'a', 'ô' => 'o', 'ď' => 'd', 'ḟ' => 'f', 'ë' => 'e', 'š' => 's', 'ơ' => 'o', + 'ß' => 'ss', 'ă' => 'a', 'ř' => 'r', 'ț' => 't', 'ň' => 'n', 'ā' => 'a', 'ķ' => 'k', + 'ŝ' => 's', 'ỳ' => 'y', 'ņ' => 'n', 'ĺ' => 'l', 'ħ' => 'h', 'ṗ' => 'p', 'ó' => 'o', + 'ú' => 'u', 'ě' => 'e', 'é' => 'e', 'ç' => 'c', 'ẁ' => 'w', 'ċ' => 'c', 'õ' => 'o', + 'ṡ' => 's', 'ø' => 'o', 'ģ' => 'g', 'ŧ' => 't', 'ș' => 's', 'ė' => 'e', 'ĉ' => 'c', + 'ś' => 's', 'î' => 'i', 'ű' => 'u', 'ć' => 'c', 'ę' => 'e', 'ŵ' => 'w', 'ṫ' => 't', + 'ū' => 'u', 'č' => 'c', 'ö' => 'o', 'è' => 'e', 'ŷ' => 'y', 'ą' => 'a', 'ł' => 'l', + 'ų' => 'u', 'ů' => 'u', 'ş' => 's', 'ğ' => 'g', 'ļ' => 'l', 'ƒ' => 'f', 'ž' => 'z', + 'ẃ' => 'w', 'ḃ' => 'b', 'å' => 'a', 'ì' => 'i', 'ï' => 'i', 'ḋ' => 'd', 'ť' => 't', + 'ŗ' => 'r', 'ä' => 'a', 'í' => 'i', 'ŕ' => 'r', 'ê' => 'e', 'ü' => 'u', 'ò' => 'o', + 'ē' => 'e', 'ñ' => 'n', 'ń' => 'n', 'ĥ' => 'h', 'ĝ' => 'g', 'đ' => 'd', 'ĵ' => 'j', + 'ÿ' => 'y', 'ũ' => 'u', 'ŭ' => 'u', 'ư' => 'u', 'ţ' => 't', 'ý' => 'y', 'ő' => 'o', + 'â' => 'a', 'ľ' => 'l', 'ẅ' => 'w', 'ż' => 'z', 'ī' => 'i', 'ã' => 'a', 'ġ' => 'g', + 'ṁ' => 'm', 'ō' => 'o', 'ĩ' => 'i', 'ù' => 'u', 'į' => 'i', 'ź' => 'z', 'á' => 'a', + 'û' => 'u', 'þ' => 'th', 'ð' => 'dh', 'æ' => 'ae', 'µ' => 'u', 'ĕ' => 'e', + ); + } + + $str = str_replace( + array_keys($UTF8_LOWER_ACCENTS), + array_values($UTF8_LOWER_ACCENTS), + $str + ); + } + + if ($case >= 0) + { + if ($UTF8_UPPER_ACCENTS === NULL) + { + $UTF8_UPPER_ACCENTS = array( + 'À' => 'A', 'Ô' => 'O', 'Ď' => 'D', 'Ḟ' => 'F', 'Ë' => 'E', 'Š' => 'S', 'Ơ' => 'O', + 'Ă' => 'A', 'Ř' => 'R', 'Ț' => 'T', 'Ň' => 'N', 'Ā' => 'A', 'Ķ' => 'K', 'Ĕ' => 'E', + 'Ŝ' => 'S', 'Ỳ' => 'Y', 'Ņ' => 'N', 'Ĺ' => 'L', 'Ħ' => 'H', 'Ṗ' => 'P', 'Ó' => 'O', + 'Ú' => 'U', 'Ě' => 'E', 'É' => 'E', 'Ç' => 'C', 'Ẁ' => 'W', 'Ċ' => 'C', 'Õ' => 'O', + 'Ṡ' => 'S', 'Ø' => 'O', 'Ģ' => 'G', 'Ŧ' => 'T', 'Ș' => 'S', 'Ė' => 'E', 'Ĉ' => 'C', + 'Ś' => 'S', 'Î' => 'I', 'Ű' => 'U', 'Ć' => 'C', 'Ę' => 'E', 'Ŵ' => 'W', 'Ṫ' => 'T', + 'Ū' => 'U', 'Č' => 'C', 'Ö' => 'O', 'È' => 'E', 'Ŷ' => 'Y', 'Ą' => 'A', 'Ł' => 'L', + 'Ų' => 'U', 'Ů' => 'U', 'Ş' => 'S', 'Ğ' => 'G', 'Ļ' => 'L', 'Ƒ' => 'F', 'Ž' => 'Z', + 'Ẃ' => 'W', 'Ḃ' => 'B', 'Å' => 'A', 'Ì' => 'I', 'Ï' => 'I', 'Ḋ' => 'D', 'Ť' => 'T', + 'Ŗ' => 'R', 'Ä' => 'A', 'Í' => 'I', 'Ŕ' => 'R', 'Ê' => 'E', 'Ü' => 'U', 'Ò' => 'O', + 'Ē' => 'E', 'Ñ' => 'N', 'Ń' => 'N', 'Ĥ' => 'H', 'Ĝ' => 'G', 'Đ' => 'D', 'Ĵ' => 'J', + 'Ÿ' => 'Y', 'Ũ' => 'U', 'Ŭ' => 'U', 'Ư' => 'U', 'Ţ' => 'T', 'Ý' => 'Y', 'Ő' => 'O', + 'Â' => 'A', 'Ľ' => 'L', 'Ẅ' => 'W', 'Ż' => 'Z', 'Ī' => 'I', 'Ã' => 'A', 'Ġ' => 'G', + 'Ṁ' => 'M', 'Ō' => 'O', 'Ĩ' => 'I', 'Ù' => 'U', 'Į' => 'I', 'Ź' => 'Z', 'Á' => 'A', + 'Û' => 'U', 'Þ' => 'Th', 'Ð' => 'Dh', 'Æ' => 'Ae', + ); + } + + $str = str_replace( + array_keys($UTF8_UPPER_ACCENTS), + array_values($UTF8_UPPER_ACCENTS), + $str + ); + } + + return $str; +}
\ No newline at end of file diff --git a/kohana/core/utf8/trim.php b/kohana/core/utf8/trim.php new file mode 100644 index 00000000..2bfbb385 --- /dev/null +++ b/kohana/core/utf8/trim.php @@ -0,0 +1,17 @@ +<?php defined('SYSPATH') or die('No direct script access.'); +/** + * utf8::trim + * + * @package Core + * @author Kohana Team + * @copyright (c) 2007 Kohana Team + * @copyright (c) 2005 Harry Fuecks + * @license http://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt + */ +function _trim($str, $charlist = NULL) +{ + if ($charlist === NULL) + return trim($str); + + return utf8::ltrim(utf8::rtrim($str, $charlist), $charlist); +}
\ No newline at end of file diff --git a/kohana/core/utf8/ucfirst.php b/kohana/core/utf8/ucfirst.php new file mode 100644 index 00000000..63dbe80c --- /dev/null +++ b/kohana/core/utf8/ucfirst.php @@ -0,0 +1,18 @@ +<?php defined('SYSPATH') or die('No direct script access.'); +/** + * utf8::ucfirst + * + * @package Core + * @author Kohana Team + * @copyright (c) 2007 Kohana Team + * @copyright (c) 2005 Harry Fuecks + * @license http://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt + */ +function _ucfirst($str) +{ + if (utf8::is_ascii($str)) + return ucfirst($str); + + preg_match('/^(.?)(.*)$/us', $str, $matches); + return utf8::strtoupper($matches[1]).$matches[2]; +}
\ No newline at end of file diff --git a/kohana/core/utf8/ucwords.php b/kohana/core/utf8/ucwords.php new file mode 100644 index 00000000..2ba57573 --- /dev/null +++ b/kohana/core/utf8/ucwords.php @@ -0,0 +1,26 @@ +<?php defined('SYSPATH') or die('No direct script access.'); +/** + * utf8::ucwords + * + * @package Core + * @author Kohana Team + * @copyright (c) 2007 Kohana Team + * @copyright (c) 2005 Harry Fuecks + * @license http://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt + */ +function _ucwords($str) +{ + if (SERVER_UTF8) + return mb_convert_case($str, MB_CASE_TITLE); + + if (utf8::is_ascii($str)) + return ucwords($str); + + // [\x0c\x09\x0b\x0a\x0d\x20] matches form feeds, horizontal tabs, vertical tabs, linefeeds and carriage returns. + // This corresponds to the definition of a 'word' defined at http://php.net/ucwords + return preg_replace( + '/(?<=^|[\x0c\x09\x0b\x0a\x0d\x20])[^\x0c\x09\x0b\x0a\x0d\x20]/ue', + 'utf8::strtoupper(\'$0\')', + $str + ); +}
\ No newline at end of file |