A port of phputf8 to a unified set of files. Provides multi-byte aware replacement string functions.
For UTF-8 support to work correctly, the following requirements must be met:
This file is licensed differently from the rest of Kohana. As a port of phputf8, this file is released under the LGPL.
Class declared in SYSPATH/classes/kohana/utf8.php on line 26.
array
$calledList of called methods that have had their required file included.
array(1) ( "transliterate_to_ascii" => bool TRUE )
boolean
$server_utf8Does the server support UTF-8 natively?
bool TRUE
Recursively cleans arrays, objects, and strings. Removes ASCII control codes and converts to the requested charset while silently discarding incompatible characters.
UTF8::clean($_GET); // Clean GET data
This method requires Iconv
mixed
$var
required - Variable to cleanstring
$charset
= NULL - Character set, defaults to Kohana::$charsetmixed
public static function clean($var, $charset = NULL)
{
if ( ! $charset)
{
// Use the application character set
$charset = Kohana::$charset;
}
if (is_array($var) OR is_object($var))
{
foreach ($var as $key => $val)
{
// Recursion!
$var[self::clean($key)] = self::clean($val);
}
}
elseif (is_string($var) AND $var !== '')
{
// Remove control characters
$var = self::strip_ascii_ctrl($var);
if ( ! self::is_ascii($var))
{
// Disable notices
$error_reporting = error_reporting(~E_NOTICE);
// iconv is expensive, so it is only used when needed
$var = iconv($charset, $charset.'//IGNORE', $var);
// Turn notices back on
error_reporting($error_reporting);
}
}
return $var;
}
Takes an array of ints representing the Unicode characters and returns a UTF-8 string. Astral planes are supported i.e. the ints in the input can be > 0xFFFF. Occurrances of the BOM are ignored. Surrogates are not allowed.
$str = UTF8::to_unicode($array);
The Original Code is Mozilla Communicator client code. The Initial Developer of the Original Code is Netscape Communications Corporation. Portions created by the Initial Developer are Copyright (C) 1998 the Initial Developer. Ported to PHP by Henri Sivonen hsivonen@iki.fi, see http://hsivonen.iki.fi/php-utf8/ Slight modifications to fit with phputf8 library by Harry Fuecks hfuecks@gmail.com.
array
$arr
required - $str unicode code points representing a stringstring
- Utf8 string of charactersboolean
- FALSE if a code point cannot be foundpublic static function from_unicode($arr)
{
if ( ! isset(self::$called[__FUNCTION__]))
{
require Kohana::find_file('utf8', __FUNCTION__);
// Function has been called
self::$called[__FUNCTION__] = TRUE;
}
return _from_unicode($arr);
}
Tests whether a string contains only 7-bit ASCII bytes. This is used to determine when to use native functions or UTF-8 functions.
$ascii = UTF8::is_ascii($str);
mixed
$str
required - String or array of strings to checkboolean
public static function is_ascii($str)
{
if (is_array($str))
{
$str = implode($str);
}
return ! preg_match('/[^\x00-\x7F]/S', $str);
}
Strips whitespace (or other UTF-8 characters) from the beginning of a string. This is a UTF8-aware version of ltrim.
$str = UTF8::ltrim($str);
string
$str
required - Input stringstring
$charlist
= NULL - String of characters to removestring
public static function ltrim($str, $charlist = NULL)
{
if ( ! isset(self::$called[__FUNCTION__]))
{
require Kohana::find_file('utf8', __FUNCTION__);
// Function has been called
self::$called[__FUNCTION__] = TRUE;
}
return _ltrim($str, $charlist);
}
Returns the unicode ordinal for a character. This is a UTF8-aware version of ord.
$digit = UTF8::ord($character);
string
$chr
required - UTF-8 encoded characterinteger
public static function ord($chr)
{
if ( ! isset(self::$called[__FUNCTION__]))
{
require Kohana::find_file('utf8', __FUNCTION__);
// Function has been called
self::$called[__FUNCTION__] = TRUE;
}
return _ord($chr);
}
Strips whitespace (or other UTF-8 characters) from the end of a string. This is a UTF8-aware version of rtrim.
$str = UTF8::rtrim($str);
string
$str
required - Input stringstring
$charlist
= NULL - String of characters to removestring
public static function rtrim($str, $charlist = NULL)
{
if ( ! isset(self::$called[__FUNCTION__]))
{
require Kohana::find_file('utf8', __FUNCTION__);
// Function has been called
self::$called[__FUNCTION__] = TRUE;
}
return _rtrim($str, $charlist);
}
Returns a string or an array with all occurrences of search in subject (ignoring case) and replaced with the given replace value. This is a UTF8-aware version of str_ireplace.
This function is very slow compared to the native version. Avoid using it when possible.
string|array
$search
required - Text to replacestring|array
$replace
required - Replacement textstring|array
$str
required - Subject textbyref integer
$count
= NULL - Number of matched and replaced needles will be returned via this parameter which is passed by referencePads a UTF-8 string to a certain length with another string. This is a UTF8-aware version of str_pad.
$str = UTF8::str_pad($str, $length);
string
$str
required - Input stringinteger
$final_str_length
required - Desired string length after paddingstring
$pad_str
= string(1) " " - String to use as paddingstring
$pad_type
= integer 1 - Padding type: STR_PAD_RIGHT, STR_PAD_LEFT, or STR_PAD_BOTHstring
public static function str_pad($str, $final_str_length, $pad_str = ' ', $pad_type = STR_PAD_RIGHT)
{
if ( ! isset(self::$called[__FUNCTION__]))
{
require Kohana::find_file('utf8', __FUNCTION__);
// Function has been called
self::$called[__FUNCTION__] = TRUE;
}
return _str_pad($str, $final_str_length, $pad_str, $pad_type);
}
Converts a UTF-8 string to an array. This is a UTF8-aware version of str_split.
$array = UTF8::str_split($str);
string
$str
required - Input stringinteger
$split_length
= integer 1 - Maximum length of each chunkarray
public static function str_split($str, $split_length = 1)
{
if ( ! isset(self::$called[__FUNCTION__]))
{
require Kohana::find_file('utf8', __FUNCTION__);
// Function has been called
self::$called[__FUNCTION__] = TRUE;
}
return _str_split($str, $split_length);
}
Case-insensitive UTF-8 string comparison. This is a UTF8-aware version of strcasecmp.
$compare = UTF8::strcasecmp($str1, $str2);
string
$str1
required - String to comparestring
$str2
required - String to compareinteger
- Less than 0 if str1 is less than str2integer
- Greater than 0 if str1 is greater than str2integer
- 0 if they are equalpublic static function strcasecmp($str1, $str2)
{
if ( ! isset(self::$called[__FUNCTION__]))
{
require Kohana::find_file('utf8', __FUNCTION__);
// Function has been called
self::$called[__FUNCTION__] = TRUE;
}
return _strcasecmp($str1, $str2);
}
Finds the length of the initial segment not matching mask. This is a UTF8-aware version of strcspn.
$found = UTF8::strcspn($str, $mask);
string
$str
required - Input stringstring
$mask
required - Mask for searchinteger
$offset
= NULL - Start position of the string to examineinteger
$length
= NULL - Length of the string to examineinteger
- Length of the initial segment that contains characters not in the maskpublic static function strcspn($str, $mask, $offset = NULL, $length = NULL)
{
if ( ! isset(self::$called[__FUNCTION__]))
{
require Kohana::find_file('utf8', __FUNCTION__);
// Function has been called
self::$called[__FUNCTION__] = TRUE;
}
return _strcspn($str, $mask, $offset, $length);
}
Strips out device control codes in the ASCII range.
$str = UTF8::strip_ascii_ctrl($str);
string
$str
required - String to cleanstring
public static function strip_ascii_ctrl($str)
{
return preg_replace('/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S', '', $str);
}
Strips out all non-7bit ASCII bytes.
$str = UTF8::strip_non_ascii($str);
string
$str
required - String to cleanstring
public static function strip_non_ascii($str)
{
return preg_replace('/[^\x00-\x7F]+/S', '', $str);
}
Case-insenstive UTF-8 version of strstr. Returns all of input string from the first occurrence of needle to the end. This is a UTF8-aware version of stristr.
$found = UTF8::stristr($str, $search);
string
$str
required - Input stringstring
$search
required - Needlestring
- Matched substring if foundFALSE
- If the substring was not foundpublic static function stristr($str, $search)
{
if ( ! isset(self::$called[__FUNCTION__]))
{
require Kohana::find_file('utf8', __FUNCTION__);
// Function has been called
self::$called[__FUNCTION__] = TRUE;
}
return _stristr($str, $search);
}
Returns the length of the given string. This is a UTF8-aware version of strlen.
$length = UTF8::strlen($str);
string
$str
required - String being measured for lengthinteger
public static function strlen($str)
{
if (UTF8::$server_utf8)
return mb_strlen($str, Kohana::$charset);
if ( ! isset(self::$called[__FUNCTION__]))
{
require Kohana::find_file('utf8', __FUNCTION__);
// Function has been called
self::$called[__FUNCTION__] = TRUE;
}
return _strlen($str);
}
Finds position of first occurrence of a UTF-8 string. This is a UTF8-aware version of strpos.
$position = UTF8::strpos($str, $search);
string
$str
required - Haystackstring
$search
required - Needleinteger
$offset
= integer 0 - Offset from which character in haystack to start searchinginteger
- Position of needleboolean
- FALSE if the needle is not foundpublic static function strpos($str, $search, $offset = 0)
{
if (UTF8::$server_utf8)
return mb_strpos($str, $search, $offset, Kohana::$charset);
if ( ! isset(self::$called[__FUNCTION__]))
{
require Kohana::find_file('utf8', __FUNCTION__);
// Function has been called
self::$called[__FUNCTION__] = TRUE;
}
return _strpos($str, $search, $offset);
}
Reverses a UTF-8 string. This is a UTF8-aware version of strrev.
$str = UTF8::strrev($str);
string
$str
required - String to be reversedstring
public static function strrev($str)
{
if ( ! isset(self::$called[__FUNCTION__]))
{
require Kohana::find_file('utf8', __FUNCTION__);
// Function has been called
self::$called[__FUNCTION__] = TRUE;
}
return _strrev($str);
}
Finds position of last occurrence of a char in a UTF-8 string. This is a UTF8-aware version of strrpos.
$position = UTF8::strrpos($str, $search);
string
$str
required - Haystackstring
$search
required - Needleinteger
$offset
= integer 0 - Offset from which character in haystack to start searchinginteger
- Position of needleboolean
- FALSE if the needle is not foundpublic static function strrpos($str, $search, $offset = 0)
{
if (UTF8::$server_utf8)
return mb_strrpos($str, $search, $offset, Kohana::$charset);
if ( ! isset(self::$called[__FUNCTION__]))
{
require Kohana::find_file('utf8', __FUNCTION__);
// Function has been called
self::$called[__FUNCTION__] = TRUE;
}
return _strrpos($str, $search, $offset);
}
Finds the length of the initial segment matching mask. This is a UTF8-aware version of strspn.
$found = UTF8::strspn($str, $mask);
string
$str
required - Input stringstring
$mask
required - Mask for searchinteger
$offset
= NULL - Start position of the string to examineinteger
$length
= NULL - Length of the string to examineinteger
- Length of the initial segment that contains characters in the maskpublic static function strspn($str, $mask, $offset = NULL, $length = NULL)
{
if ( ! isset(self::$called[__FUNCTION__]))
{
require Kohana::find_file('utf8', __FUNCTION__);
// Function has been called
self::$called[__FUNCTION__] = TRUE;
}
return _strspn($str, $mask, $offset, $length);
}
Makes a UTF-8 string lowercase. This is a UTF8-aware version of strtolower.
$str = UTF8::strtolower($str);
string
$str
required - Mixed case stringstring
public static function strtolower($str)
{
if (UTF8::$server_utf8)
return mb_strtolower($str, Kohana::$charset);
if ( ! isset(self::$called[__FUNCTION__]))
{
require Kohana::find_file('utf8', __FUNCTION__);
// Function has been called
self::$called[__FUNCTION__] = TRUE;
}
return _strtolower($str);
}
Makes a UTF-8 string uppercase. This is a UTF8-aware version of strtoupper.
string
$str
required - Mixed case stringstring
public static function strtoupper($str)
{
if (UTF8::$server_utf8)
return mb_strtoupper($str, Kohana::$charset);
if ( ! isset(self::$called[__FUNCTION__]))
{
require Kohana::find_file('utf8', __FUNCTION__);
// Function has been called
self::$called[__FUNCTION__] = TRUE;
}
return _strtoupper($str);
}
Returns part of a UTF-8 string. This is a UTF8-aware version of substr.
$sub = UTF8::substr($str, $offset);
string
$str
required - Input stringinteger
$offset
required - Offsetinteger
$length
= NULL - Length limitstring
public static function substr($str, $offset, $length = NULL)
{
if (UTF8::$server_utf8)
return ($length === NULL)
? mb_substr($str, $offset, mb_strlen($str), Kohana::$charset)
: mb_substr($str, $offset, $length, Kohana::$charset);
if ( ! isset(self::$called[__FUNCTION__]))
{
require Kohana::find_file('utf8', __FUNCTION__);
// Function has been called
self::$called[__FUNCTION__] = TRUE;
}
return _substr($str, $offset, $length);
}
Replaces text within a portion of a UTF-8 string. This is a UTF8-aware version of substr_replace.
$str = UTF8::substr_replace($str, $replacement, $offset);
string
$str
required - Input stringstring
$replacement
required - Replacement stringinteger
$offset
required - Offsetunknown
$length
= NULLstring
public static function substr_replace($str, $replacement, $offset, $length = NULL)
{
if ( ! isset(self::$called[__FUNCTION__]))
{
require Kohana::find_file('utf8', __FUNCTION__);
// Function has been called
self::$called[__FUNCTION__] = TRUE;
}
return _substr_replace($str, $replacement, $offset, $length);
}
Takes an UTF-8 string and returns an array of ints representing the Unicode characters. Astral planes are supported i.e. the ints in the output can be > 0xFFFF. Occurrences of the BOM are ignored. Surrogates are not allowed.
$array = UTF8::to_unicode($str);
The Original Code is Mozilla Communicator client code. The Initial Developer of the Original Code is Netscape Communications Corporation. Portions created by the Initial Developer are Copyright (C) 1998 the Initial Developer. Ported to PHP by Henri Sivonen hsivonen@iki.fi, see http://hsivonen.iki.fi/php-utf8/ Slight modifications to fit with phputf8 library by Harry Fuecks hfuecks@gmail.com
string
$str
required - UTF-8 encoded stringarray
- Unicode code pointsFALSE
- If the string is invalidpublic static function to_unicode($str)
{
if ( ! isset(self::$called[__FUNCTION__]))
{
require Kohana::find_file('utf8', __FUNCTION__);
// Function has been called
self::$called[__FUNCTION__] = TRUE;
}
return _to_unicode($str);
}
Replaces special/accented UTF-8 characters by ASCII-7 "equivalents".
$ascii = UTF8::transliterate_to_ascii($utf8);
string
$str
required - String to transliterateinteger
$case
= integer 0 - -1 lowercase only, +1 uppercase only, 0 both casesstring
public static function transliterate_to_ascii($str, $case = 0)
{
if ( ! isset(self::$called[__FUNCTION__]))
{
require Kohana::find_file('utf8', __FUNCTION__);
// Function has been called
self::$called[__FUNCTION__] = TRUE;
}
return _transliterate_to_ascii($str, $case);
}
Strips whitespace (or other UTF-8 characters) from the beginning and end of a string. This is a UTF8-aware version of trim.
$str = UTF8::trim($str);
string
$str
required - Input stringstring
$charlist
= NULL - String of characters to removestring
public static function trim($str, $charlist = NULL)
{
if ( ! isset(self::$called[__FUNCTION__]))
{
require Kohana::find_file('utf8', __FUNCTION__);
// Function has been called
self::$called[__FUNCTION__] = TRUE;
}
return _trim($str, $charlist);
}
Makes a UTF-8 string's first character uppercase. This is a UTF8-aware version of ucfirst.
$str = UTF8::ucfirst($str);
string
$str
required - Mixed case stringstring
public static function ucfirst($str)
{
if ( ! isset(self::$called[__FUNCTION__]))
{
require Kohana::find_file('utf8', __FUNCTION__);
// Function has been called
self::$called[__FUNCTION__] = TRUE;
}
return _ucfirst($str);
}
Makes the first character of every word in a UTF-8 string uppercase. This is a UTF8-aware version of ucwords.
$str = UTF8::ucwords($str);
string
$str
required - Mixed case stringstring
public static function ucwords($str)
{
if ( ! isset(self::$called[__FUNCTION__]))
{
require Kohana::find_file('utf8', __FUNCTION__);
// Function has been called
self::$called[__FUNCTION__] = TRUE;
}
return _ucwords($str);
}