mirror of
https://github.com/torrentpier/torrentpier-lts.git
synced 2025-03-01 15:21:02 +03:00
387 lines
12 KiB
PHP
387 lines
12 KiB
PHP
<?php
|
|
/**
|
|
* Zend Framework (http://framework.zend.com/)
|
|
*
|
|
* @link http://github.com/zendframework/zf2 for the canonical source repository
|
|
* @copyright Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com)
|
|
* @license http://framework.zend.com/license/new-bsd New BSD License
|
|
*/
|
|
|
|
namespace Zend\Escaper;
|
|
|
|
/**
|
|
* Context specific methods for use in secure output escaping
|
|
*/
|
|
class Escaper
|
|
{
|
|
/**
|
|
* Entity Map mapping Unicode codepoints to any available named HTML entities.
|
|
*
|
|
* While HTML supports far more named entities, the lowest common denominator
|
|
* has become HTML5's XML Serialisation which is restricted to the those named
|
|
* entities that XML supports. Using HTML entities would result in this error:
|
|
* XML Parsing Error: undefined entity
|
|
*
|
|
* @var array
|
|
*/
|
|
protected static $htmlNamedEntityMap = array(
|
|
34 => 'quot', // quotation mark
|
|
38 => 'amp', // ampersand
|
|
60 => 'lt', // less-than sign
|
|
62 => 'gt', // greater-than sign
|
|
);
|
|
|
|
/**
|
|
* Current encoding for escaping. If not UTF-8, we convert strings from this encoding
|
|
* pre-escaping and back to this encoding post-escaping.
|
|
*
|
|
* @var string
|
|
*/
|
|
protected $encoding = 'utf-8';
|
|
|
|
/**
|
|
* Holds the value of the special flags passed as second parameter to
|
|
* htmlspecialchars(). We modify these for PHP 5.4 to take advantage
|
|
* of the new ENT_SUBSTITUTE flag for correctly dealing with invalid
|
|
* UTF-8 sequences.
|
|
*
|
|
* @var string
|
|
*/
|
|
protected $htmlSpecialCharsFlags = ENT_QUOTES;
|
|
|
|
/**
|
|
* Static Matcher which escapes characters for HTML Attribute contexts
|
|
*
|
|
* @var callable
|
|
*/
|
|
protected $htmlAttrMatcher;
|
|
|
|
/**
|
|
* Static Matcher which escapes characters for Javascript contexts
|
|
*
|
|
* @var callable
|
|
*/
|
|
protected $jsMatcher;
|
|
|
|
/**
|
|
* Static Matcher which escapes characters for CSS Attribute contexts
|
|
*
|
|
* @var callable
|
|
*/
|
|
protected $cssMatcher;
|
|
|
|
/**
|
|
* List of all encoding supported by this class
|
|
*
|
|
* @var array
|
|
*/
|
|
protected $supportedEncodings = array(
|
|
'iso-8859-1', 'iso8859-1', 'iso-8859-5', 'iso8859-5',
|
|
'iso-8859-15', 'iso8859-15', 'utf-8', 'cp866',
|
|
'ibm866', '866', 'cp1251', 'windows-1251',
|
|
'win-1251', '1251', 'cp1252', 'windows-1252',
|
|
'1252', 'koi8-r', 'koi8-ru', 'koi8r',
|
|
'big5', '950', 'gb2312', '936',
|
|
'big5-hkscs', 'shift_jis', 'sjis', 'sjis-win',
|
|
'cp932', '932', 'euc-jp', 'eucjp',
|
|
'eucjp-win', 'macroman'
|
|
);
|
|
|
|
/**
|
|
* Constructor: Single parameter allows setting of global encoding for use by
|
|
* the current object. If PHP 5.4 is detected, additional ENT_SUBSTITUTE flag
|
|
* is set for htmlspecialchars() calls.
|
|
*
|
|
* @param string $encoding
|
|
* @throws Exception\InvalidArgumentException
|
|
*/
|
|
public function __construct($encoding = null)
|
|
{
|
|
if ($encoding !== null) {
|
|
$encoding = (string) $encoding;
|
|
if ($encoding === '') {
|
|
throw new Exception\InvalidArgumentException(
|
|
get_class($this) . ' constructor parameter does not allow a blank value'
|
|
);
|
|
}
|
|
|
|
$encoding = strtolower($encoding);
|
|
if (!in_array($encoding, $this->supportedEncodings)) {
|
|
throw new Exception\InvalidArgumentException(
|
|
'Value of \'' . $encoding . '\' passed to ' . get_class($this)
|
|
. ' constructor parameter is invalid. Provide an encoding supported by htmlspecialchars()'
|
|
);
|
|
}
|
|
|
|
$this->encoding = $encoding;
|
|
}
|
|
|
|
if (defined('ENT_SUBSTITUTE')) {
|
|
$this->htmlSpecialCharsFlags|= ENT_SUBSTITUTE;
|
|
}
|
|
|
|
// set matcher callbacks
|
|
$this->htmlAttrMatcher = array($this, 'htmlAttrMatcher');
|
|
$this->jsMatcher = array($this, 'jsMatcher');
|
|
$this->cssMatcher = array($this, 'cssMatcher');
|
|
}
|
|
|
|
/**
|
|
* Return the encoding that all output/input is expected to be encoded in.
|
|
*
|
|
* @return string
|
|
*/
|
|
public function getEncoding()
|
|
{
|
|
return $this->encoding;
|
|
}
|
|
|
|
/**
|
|
* Escape a string for the HTML Body context where there are very few characters
|
|
* of special meaning. Internally this will use htmlspecialchars().
|
|
*
|
|
* @param string $string
|
|
* @return string
|
|
*/
|
|
public function escapeHtml($string)
|
|
{
|
|
return htmlspecialchars($string, $this->htmlSpecialCharsFlags, $this->encoding);
|
|
}
|
|
|
|
/**
|
|
* Escape a string for the HTML Attribute context. We use an extended set of characters
|
|
* to escape that are not covered by htmlspecialchars() to cover cases where an attribute
|
|
* might be unquoted or quoted illegally (e.g. backticks are valid quotes for IE).
|
|
*
|
|
* @param string $string
|
|
* @return string
|
|
*/
|
|
public function escapeHtmlAttr($string)
|
|
{
|
|
$string = $this->toUtf8($string);
|
|
if ($string === '' || ctype_digit($string)) {
|
|
return $string;
|
|
}
|
|
|
|
$result = preg_replace_callback('/[^a-z0-9,\.\-_]/iSu', $this->htmlAttrMatcher, $string);
|
|
return $this->fromUtf8($result);
|
|
}
|
|
|
|
/**
|
|
* Escape a string for the Javascript context. This does not use json_encode(). An extended
|
|
* set of characters are escaped beyond ECMAScript's rules for Javascript literal string
|
|
* escaping in order to prevent misinterpretation of Javascript as HTML leading to the
|
|
* injection of special characters and entities. The escaping used should be tolerant
|
|
* of cases where HTML escaping was not applied on top of Javascript escaping correctly.
|
|
* Backslash escaping is not used as it still leaves the escaped character as-is and so
|
|
* is not useful in a HTML context.
|
|
*
|
|
* @param string $string
|
|
* @return string
|
|
*/
|
|
public function escapeJs($string)
|
|
{
|
|
$string = $this->toUtf8($string);
|
|
if ($string === '' || ctype_digit($string)) {
|
|
return $string;
|
|
}
|
|
|
|
$result = preg_replace_callback('/[^a-z0-9,\._]/iSu', $this->jsMatcher, $string);
|
|
return $this->fromUtf8($result);
|
|
}
|
|
|
|
/**
|
|
* Escape a string for the URI or Parameter contexts. This should not be used to escape
|
|
* an entire URI - only a subcomponent being inserted. The function is a simple proxy
|
|
* to rawurlencode() which now implements RFC 3986 since PHP 5.3 completely.
|
|
*
|
|
* @param string $string
|
|
* @return string
|
|
*/
|
|
public function escapeUrl($string)
|
|
{
|
|
return rawurlencode($string);
|
|
}
|
|
|
|
/**
|
|
* Escape a string for the CSS context. CSS escaping can be applied to any string being
|
|
* inserted into CSS and escapes everything except alphanumerics.
|
|
*
|
|
* @param string $string
|
|
* @return string
|
|
*/
|
|
public function escapeCss($string)
|
|
{
|
|
$string = $this->toUtf8($string);
|
|
if ($string === '' || ctype_digit($string)) {
|
|
return $string;
|
|
}
|
|
|
|
$result = preg_replace_callback('/[^a-z0-9]/iSu', $this->cssMatcher, $string);
|
|
return $this->fromUtf8($result);
|
|
}
|
|
|
|
/**
|
|
* Callback function for preg_replace_callback that applies HTML Attribute
|
|
* escaping to all matches.
|
|
*
|
|
* @param array $matches
|
|
* @return string
|
|
*/
|
|
protected function htmlAttrMatcher($matches)
|
|
{
|
|
$chr = $matches[0];
|
|
$ord = ord($chr);
|
|
|
|
/**
|
|
* The following replaces characters undefined in HTML with the
|
|
* hex entity for the Unicode replacement character.
|
|
*/
|
|
if (($ord <= 0x1f && $chr != "\t" && $chr != "\n" && $chr != "\r")
|
|
|| ($ord >= 0x7f && $ord <= 0x9f)
|
|
) {
|
|
return '�';
|
|
}
|
|
|
|
/**
|
|
* Check if the current character to escape has a name entity we should
|
|
* replace it with while grabbing the integer value of the character.
|
|
*/
|
|
if (strlen($chr) > 1) {
|
|
$chr = $this->convertEncoding($chr, 'UTF-16BE', 'UTF-8');
|
|
}
|
|
|
|
$hex = bin2hex($chr);
|
|
$ord = hexdec($hex);
|
|
if (isset(static::$htmlNamedEntityMap[$ord])) {
|
|
return '&' . static::$htmlNamedEntityMap[$ord] . ';';
|
|
}
|
|
|
|
/**
|
|
* Per OWASP recommendations, we'll use upper hex entities
|
|
* for any other characters where a named entity does not exist.
|
|
*/
|
|
if ($ord > 255) {
|
|
return sprintf('&#x%04X;', $ord);
|
|
}
|
|
return sprintf('&#x%02X;', $ord);
|
|
}
|
|
|
|
/**
|
|
* Callback function for preg_replace_callback that applies Javascript
|
|
* escaping to all matches.
|
|
*
|
|
* @param array $matches
|
|
* @return string
|
|
*/
|
|
protected function jsMatcher($matches)
|
|
{
|
|
$chr = $matches[0];
|
|
if (strlen($chr) == 1) {
|
|
return sprintf('\\x%02X', ord($chr));
|
|
}
|
|
$chr = $this->convertEncoding($chr, 'UTF-16BE', 'UTF-8');
|
|
return sprintf('\\u%04s', strtoupper(bin2hex($chr)));
|
|
}
|
|
|
|
/**
|
|
* Callback function for preg_replace_callback that applies CSS
|
|
* escaping to all matches.
|
|
*
|
|
* @param array $matches
|
|
* @return string
|
|
*/
|
|
protected function cssMatcher($matches)
|
|
{
|
|
$chr = $matches[0];
|
|
if (strlen($chr) == 1) {
|
|
$ord = ord($chr);
|
|
} else {
|
|
$chr = $this->convertEncoding($chr, 'UTF-16BE', 'UTF-8');
|
|
$ord = hexdec(bin2hex($chr));
|
|
}
|
|
return sprintf('\\%X ', $ord);
|
|
}
|
|
|
|
/**
|
|
* Converts a string to UTF-8 from the base encoding. The base encoding is set via this
|
|
* class' constructor.
|
|
*
|
|
* @param string $string
|
|
* @throws Exception\RuntimeException
|
|
* @return string
|
|
*/
|
|
protected function toUtf8($string)
|
|
{
|
|
if ($this->getEncoding() === 'utf-8') {
|
|
$result = $string;
|
|
} else {
|
|
$result = $this->convertEncoding($string, 'UTF-8', $this->getEncoding());
|
|
}
|
|
|
|
if (!$this->isUtf8($result)) {
|
|
throw new Exception\RuntimeException(
|
|
sprintf('String to be escaped was not valid UTF-8 or could not be converted: %s', $result)
|
|
);
|
|
}
|
|
|
|
return $result;
|
|
}
|
|
|
|
/**
|
|
* Converts a string from UTF-8 to the base encoding. The base encoding is set via this
|
|
* class' constructor.
|
|
* @param string $string
|
|
* @return string
|
|
*/
|
|
protected function fromUtf8($string)
|
|
{
|
|
if ($this->getEncoding() === 'utf-8') {
|
|
return $string;
|
|
}
|
|
|
|
return $this->convertEncoding($string, $this->getEncoding(), 'UTF-8');
|
|
}
|
|
|
|
/**
|
|
* Checks if a given string appears to be valid UTF-8 or not.
|
|
*
|
|
* @param string $string
|
|
* @return bool
|
|
*/
|
|
protected function isUtf8($string)
|
|
{
|
|
return ($string === '' || preg_match('/^./su', $string));
|
|
}
|
|
|
|
/**
|
|
* Encoding conversion helper which wraps iconv and mbstring where they exist or throws
|
|
* and exception where neither is available.
|
|
*
|
|
* @param string $string
|
|
* @param string $to
|
|
* @param array|string $from
|
|
* @throws Exception\RuntimeException
|
|
* @return string
|
|
*/
|
|
protected function convertEncoding($string, $to, $from)
|
|
{
|
|
if (function_exists('iconv')) {
|
|
$result = iconv($from, $to, $string);
|
|
} elseif (function_exists('mb_convert_encoding')) {
|
|
$result = mb_convert_encoding($string, $to, $from);
|
|
} else {
|
|
throw new Exception\RuntimeException(
|
|
get_class($this)
|
|
. ' requires either the iconv or mbstring extension to be installed'
|
|
. ' when escaping for non UTF-8 strings.'
|
|
);
|
|
}
|
|
|
|
if ($result === false) {
|
|
return ''; // return non-fatal blank string on encoding errors from users
|
|
}
|
|
return $result;
|
|
}
|
|
}
|