diff options
author | Bharat Mediratta <bharat@menalto.com> | 2009-05-05 03:30:49 +0000 |
---|---|---|
committer | Bharat Mediratta <bharat@menalto.com> | 2009-05-05 03:30:49 +0000 |
commit | 13f8a0dd7baf07ebddc72c1764b5ab13f3c9f42c (patch) | |
tree | b0d5a2f006c1dd9a528c4eb7eca2e428b40566ac /modules | |
parent | 2434feabeb9244eabe61d906eafce7adb518319e (diff) |
Detect input encodings from EXIF/IPTC data and convert to utf-8 as
appropriate. This allows us to switch the exif value column back to
varchar and improves the way that we deal with non-utf8 data in our
embedded EXIF/IPTC data.
Diffstat (limited to 'modules')
-rw-r--r-- | modules/exif/helpers/exif.php | 17 | ||||
-rw-r--r-- | modules/exif/helpers/exif_installer.php | 5 | ||||
-rw-r--r-- | modules/tag/helpers/tag_event.php | 5 |
3 files changed, 16 insertions, 11 deletions
diff --git a/modules/exif/helpers/exif.php b/modules/exif/helpers/exif.php index 9bdbc172..25d9eadd 100644 --- a/modules/exif/helpers/exif.php +++ b/modules/exif/helpers/exif.php @@ -37,10 +37,11 @@ class exif_Core { if (isset($exif_raw['ValidEXIFData'])) { foreach(self::_keys() as $field => $exifvar) { if (isset($exif_raw[$exifvar[0]][$exifvar[1]])) { - $data[] = sprintf( - "(%d, '%s', '%s')", - $item->id, $db->escape_str($field), - $db->escape_str($exif_raw[$exifvar[0]][$exifvar[1]])); + $value = $exif_raw[$exifvar[0]][$exifvar[1]]; + if (mb_detect_encoding($value) != "UTF-8") { + $value = utf8_encode($value); + } + $data[] = sprintf("(%d, '%s', '%s')", $item->id, $field, $db->escape_str($value)); } } } @@ -49,10 +50,14 @@ class exif_Core { $iptc = iptcparse($info["APP13"]); foreach (array("Keywords" => "2#025", "Caption" => "2#120") as $keyword => $iptc_key) { if (!empty($iptc[$iptc_key])) { + $value = implode(" ", $iptc[$iptc_key]); + if (mb_detect_encoding($value) != "UTF-8") { + $value = utf8_encode($value); + } $data[] = sprintf( "(%d, '%s', '%s')", - $item->id, $db->escape_str($keyword), - $db->escape_str(implode(" ", $iptc[$iptc_key]))); + $item->id, $keyword, + $db->escape_str($value)); } } } diff --git a/modules/exif/helpers/exif_installer.php b/modules/exif/helpers/exif_installer.php index c5c0b027..4cd1a6b2 100644 --- a/modules/exif/helpers/exif_installer.php +++ b/modules/exif/helpers/exif_installer.php @@ -23,14 +23,11 @@ class exif_installer { if ($version == 0) { $db = Database::instance(); - // we use blob as the data type for the value column because occasionally there's binary - // data embedded in the caption field for photos. This preserves the binary data at the - // expense of natural language collation (which we don't use anyway). $db->query("CREATE TABLE IF NOT EXISTS {exif_keys} ( `id` int(9) NOT NULL auto_increment, `item_id` int(9) NOT NULL, `name` varchar(64) NOT NULL, - `value` varbinary(1536) NOT NULL, + `value` varchar(1536) NOT NULL, PRIMARY KEY (`id`)) ENGINE=InnoDB DEFAULT CHARSET=utf8;"); $db->query("CREATE TABLE IF NOT EXISTS {exif_records} ( diff --git a/modules/tag/helpers/tag_event.php b/modules/tag/helpers/tag_event.php index a301c441..a4a6ec26 100644 --- a/modules/tag/helpers/tag_event.php +++ b/modules/tag/helpers/tag_event.php @@ -34,7 +34,10 @@ class tag_event_Core { if (!empty($iptc["2#025"])) { foreach($iptc["2#025"] as $tag) { $tag = str_replace("\0", "", $tag); - $tags[$tag]= 1; + if (mb_detect_encoding($tag) != "UTF-8") { + $tag = utf8_encode($tag); + } + $tags[$tag] = 1; } } } |