From 020281d932c566476222e6c825ada3affff239a6 Mon Sep 17 00:00:00 2001 From: Andy Staudacher Date: Sat, 29 Aug 2009 10:45:47 -0700 Subject: Adding SafeString which is going to replace p::clean() and p::purify(). Refactoring of Xss_Security_Test. t() and t2() return a SafeString instance. TODO: - Update all code to use SafeString where appropriate. - Update golden fole of Xss_Security_Test - Stop reporting CLEAN vars in Xss_Security_Test --- modules/gallery/helpers/p.php | 16 +- modules/gallery/libraries/I18n.php | 20 +- modules/gallery/libraries/MY_ORM.php | 4 + modules/gallery/libraries/SafeString.php | 142 ++++++++++++ modules/gallery/tests/SafeString_Test.php | 111 ++++++++++ modules/gallery/tests/Xss_Security_Test.php | 325 ++++++++++++++++++++++------ modules/user/views/login.html.php | 6 +- 7 files changed, 535 insertions(+), 89 deletions(-) create mode 100644 modules/gallery/libraries/SafeString.php create mode 100644 modules/gallery/tests/SafeString_Test.php (limited to 'modules') diff --git a/modules/gallery/helpers/p.php b/modules/gallery/helpers/p.php index 862c769b..e852c086 100644 --- a/modules/gallery/helpers/p.php +++ b/modules/gallery/helpers/p.php @@ -18,22 +18,12 @@ * Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA. */ class p_Core { - private static $_purifier = null; static function clean($dirty_html) { - return html::specialchars($dirty_html); + return new SafeString($dirty_html); } + // Deprecated: Please use p::clean($var).purified_html() static function purify($dirty_html) { - if (empty(self::$_purifier)) { - require_once(dirname(__file__) . "/../lib/HTMLPurifier/HTMLPurifier.auto.php"); - $config = HTMLPurifier_Config::createDefault(); - foreach (Kohana::config('purifier') as $category => $key_value) { - foreach ($key_value as $key => $value) { - $config->set("$category.$key", $value); - } - } - self::$_purifier = new HTMLPurifier($config); - } - return self::$_purifier->purify($dirty_html); + return SafeString::of($dirty_html)->purified_html(); } } diff --git a/modules/gallery/libraries/I18n.php b/modules/gallery/libraries/I18n.php index 03a6d8f6..8dc42e04 100644 --- a/modules/gallery/libraries/I18n.php +++ b/modules/gallery/libraries/I18n.php @@ -84,6 +84,12 @@ class I18n_Core { /** * Translates a localizable message. + * + * Security: + * The returned string is safe for use in HTML (it contains a safe subset of HTML and + * interpolation parameters are converted to HTML entities). + * For use in JavaScript, please call ->for_js() on it. + * * @param $message String|array The message to be translated. E.g. "Hello world" * or array("one" => "One album", "other" => "%count albums") * @param $options array (optional) Options array for key value pairs which are used @@ -110,7 +116,7 @@ class I18n_Core { $entry = $this->interpolate($locale, $entry, $values); - return $entry; + return SafeString::of($entry)->mark_html_safe(); } private function lookup($locale, $message) { @@ -179,17 +185,19 @@ class I18n_Core { return is_array($message); } - private function interpolate($locale, $string, $values) { + private function interpolate($locale, $string, $key_values) { // TODO: Handle locale specific number formatting. // Replace x_y before replacing x. - krsort($values, SORT_STRING); + krsort($key_values, SORT_STRING); $keys = array(); - foreach (array_keys($values) as $key) { + $values = array(); + foreach ($key_values as $key => $value) { $keys[] = "%$key"; + $values[] = new SafeString($value); } - return str_replace($keys, array_values($values), $string); + return str_replace($keys, $values, $string); } private function pluralize($locale, $entry, $count) { @@ -414,4 +422,4 @@ class I18n_Core { return $count == 1 ? 'one' : 'other'; } } -} \ No newline at end of file +} diff --git a/modules/gallery/libraries/MY_ORM.php b/modules/gallery/libraries/MY_ORM.php index de8adc1d..2c9ad1d7 100644 --- a/modules/gallery/libraries/MY_ORM.php +++ b/modules/gallery/libraries/MY_ORM.php @@ -43,6 +43,10 @@ class ORM extends ORM_Core { $this->original = clone $this; } + if ($value instanceof SafeString) { + $value = $value->unescaped(); + } + return parent::__set($column, $value); } diff --git a/modules/gallery/libraries/SafeString.php b/modules/gallery/libraries/SafeString.php new file mode 100644 index 00000000..53bcb27a --- /dev/null +++ b/modules/gallery/libraries/SafeString.php @@ -0,0 +1,142 @@ +_is_safe_html = $string->_is_safe_html; + $string = $string->unescaped(); + } + $this->_raw_string = (string) $string; + } + + /** + * Factory method returning a new SafeString instance for the given string. + */ + static function of($string) { + return new SafeString($string); + } + + /** + * Marks this string as safe to be used in HTML without any escaping. + */ + function mark_html_safe() { + $this->_is_safe_html = true; + return $this; + } + + /** + * Safe for use in HTML. + * @see #for_html() + */ + function __toString() { + if ($this->_is_safe_html) { + return $this->_raw_string; + } else { + return self::_escape_for_html($this->_raw_string); + } + } + + /** + * Safe for use in HTML. + * + * Example:
+   *   
+ *
+ * @return the string escaped for use in HTML. + */ + function for_html() { + return $this; + } + + /** + * Safe for use in JavaScript. + * + * Example:
+   *    block?
+	if (is_array($token) && $token[0] == T_INLINE_HTML) {
+	  $inline_html = $token[1];
+	  // T_INLINE_HTML blocks can be split. Need to handle the case
+	  // where one token has "expr_append($inline_html);
+	  }
+
+	  // Note: This approach won't catch }i', $inline_html, $matches, PREG_OFFSET_CAPTURE)) {
+	    $last_match = array_pop($matches[0]);
+	    if (is_array($last_match)) {
+	      $closing_script_pos = $last_match[1];
+	    } else {
+	      $closing_script_pos = $last_match;
+	    }
+	  }
+	  if (preg_match('{]*>}i', $inline_html, $matches, PREG_OFFSET_CAPTURE)) {
+	    $last_match = array_pop($matches[0]);
+	    if (is_array($last_match)) {
+	      $opening_script_pos = $last_match[1];
+	    } else {
+	      $opening_script_pos = $last_match;
+	    }
+	  }
+	  if ($opening_script_pos != $closing_script_pos) {
+	    $in_script_block = $opening_script_pos > $closing_script_pos;
+	  }
+	}
+
+	// Look and report each instance of < ? = ... ? >
+	if (!is_array($token)) {
+	  // A single char token, e.g: ; ( )
+	  if ($frame) {
+	    $frame->expr_append($token);
+	  }
+	} else if ($token[0] == T_OPEN_TAG_WITH_ECHO) {
+	  // No need for a stack here - assume < ? = cannot be nested.
+	  $frame = self::_create_frame($token, $in_script_block);
+        } else if ($frame && $token[0] == T_CLOSE_TAG) {
+	  // Store the < ? = ... ? > block that just ended here.
+	  $found[$view][] = $frame;
+	  $frame = null;
+        } else if ($frame && $token[0] == T_VARIABLE) {
+	  $frame->expr_append($token[1]);
+	} else if ($frame && $token[0] == T_STRING) {
+	  $frame->expr_append($token[1]);
+	  // t() and t2() are special in that they're guaranteed to return a SafeString().
+	  if (in_array($token[1], array("t", "t2"))) {
+	    if (self::_token_matches("(", $tokens, $token_number + 1)) {
+	      $frame->is_safestring(true);
+	      $frame->expr_append("(");
+
+	      $token_number++;
+	      $token = $tokens[$token_number];
+	    }
+	  } else if ($token[1] == "SafeString") {
+	    // Looking for SafeString::of(...
+	    if (self::_token_matches(array(T_DOUBLE_COLON, "::"), $tokens, $token_number + 1) &&
+		self::_token_matches(array(T_STRING, "of"), $tokens, $token_number + 2)	&&
+		self::_token_matches("(", $tokens, $token_number + 3)) {
+	      $frame->is_safestring(true);
+	      $frame->expr_append("::of(");
+
+	      $token_number += 3;
+	      $token = $tokens[$token_number];
+	    }
+	  } else if ($token[1] == "json_encode") {
+	    if (self::_token_matches("(", $tokens, $token_number + 1)) {
+	      $frame->json_encode_called(true);
+	      $frame->expr_append("(");
+
+	      $token_number++;
+	      $token = $tokens[$token_number];
+	    }
+	  }
+	} else if ($frame && $token[0] == T_OBJECT_OPERATOR) {
+	  $frame->expr_append($token[1]);
+
+	  if (self::_token_matches(array(T_STRING), $tokens, $token_number + 1) &&
+	      in_array($tokens[$token_number + 1][1],
+		       array("for_js", "for_html", "purified_html")) &&
+	      self::_token_matches("(", $tokens, $token_number + 2)) {
+
+	    $method = $tokens[$token_number + 1][1];
+	    $frame->expr_append("$method(");
+
+	    $token_number += 2;
+	    $token = $tokens[$token_number];
+
+	    if ("for_js" == $method) {
+	      $frame->for_js_called(true);
+	    } else if ("for_html" == $method) {
+	      $frame->for_html_called(true);
+	    } else if ("purified_html" == $method) {
+	      $frame->purified_html_called(true);
+	    }
+	  }
+        } else if ($frame) {
+	  $frame->expr_append($token[1]);
+	}
       }
     }
 
-    $canonical = MODPATH . "gallery/tests/xss_data.txt";
+    // Generate the report.
+    /*
+     * States for uses of < ? = X ? >:
+     * JS_XSS:
+     *   In