From 13f8a0dd7baf07ebddc72c1764b5ab13f3c9f42c Mon Sep 17 00:00:00 2001 From: Bharat Mediratta Date: Tue, 5 May 2009 03:30:49 +0000 Subject: Detect input encodings from EXIF/IPTC data and convert to utf-8 as appropriate. This allows us to switch the exif value column back to varchar and improves the way that we deal with non-utf8 data in our embedded EXIF/IPTC data. --- modules/exif/helpers/exif.php | 17 +++++++++++------ modules/exif/helpers/exif_installer.php | 5 +---- 2 files changed, 12 insertions(+), 10 deletions(-) (limited to 'modules/exif/helpers') diff --git a/modules/exif/helpers/exif.php b/modules/exif/helpers/exif.php index 9bdbc172..25d9eadd 100644 --- a/modules/exif/helpers/exif.php +++ b/modules/exif/helpers/exif.php @@ -37,10 +37,11 @@ class exif_Core { if (isset($exif_raw['ValidEXIFData'])) { foreach(self::_keys() as $field => $exifvar) { if (isset($exif_raw[$exifvar[0]][$exifvar[1]])) { - $data[] = sprintf( - "(%d, '%s', '%s')", - $item->id, $db->escape_str($field), - $db->escape_str($exif_raw[$exifvar[0]][$exifvar[1]])); + $value = $exif_raw[$exifvar[0]][$exifvar[1]]; + if (mb_detect_encoding($value) != "UTF-8") { + $value = utf8_encode($value); + } + $data[] = sprintf("(%d, '%s', '%s')", $item->id, $field, $db->escape_str($value)); } } } @@ -49,10 +50,14 @@ class exif_Core { $iptc = iptcparse($info["APP13"]); foreach (array("Keywords" => "2#025", "Caption" => "2#120") as $keyword => $iptc_key) { if (!empty($iptc[$iptc_key])) { + $value = implode(" ", $iptc[$iptc_key]); + if (mb_detect_encoding($value) != "UTF-8") { + $value = utf8_encode($value); + } $data[] = sprintf( "(%d, '%s', '%s')", - $item->id, $db->escape_str($keyword), - $db->escape_str(implode(" ", $iptc[$iptc_key]))); + $item->id, $keyword, + $db->escape_str($value)); } } } diff --git a/modules/exif/helpers/exif_installer.php b/modules/exif/helpers/exif_installer.php index c5c0b027..4cd1a6b2 100644 --- a/modules/exif/helpers/exif_installer.php +++ b/modules/exif/helpers/exif_installer.php @@ -23,14 +23,11 @@ class exif_installer { if ($version == 0) { $db = Database::instance(); - // we use blob as the data type for the value column because occasionally there's binary - // data embedded in the caption field for photos. This preserves the binary data at the - // expense of natural language collation (which we don't use anyway). $db->query("CREATE TABLE IF NOT EXISTS {exif_keys} ( `id` int(9) NOT NULL auto_increment, `item_id` int(9) NOT NULL, `name` varchar(64) NOT NULL, - `value` varbinary(1536) NOT NULL, + `value` varchar(1536) NOT NULL, PRIMARY KEY (`id`)) ENGINE=InnoDB DEFAULT CHARSET=utf8;"); $db->query("CREATE TABLE IF NOT EXISTS {exif_records} ( -- cgit v1.2.3