Kohana_UTF8
A port of phputf8 to a unified set
of files. Provides multi-byte aware replacement string functions.
For UTF-8 support to work correctly, the following requirements must be met:
- PCRE needs to be compiled with UTF-8 support (--enable-utf8)
- Support for Unicode properties
is highly recommended (--enable-unicode-properties)
- UTF-8 conversion will be much more reliable if the
iconv extension is loaded
- The mbstring extension is highly recommended,
but must not be overloading string functions
This file is licensed differently from the rest of Kohana. As a port of
phputf8, this file is released under the LGPL.
- package
- Kohana
- category
- Base
- author
- Kohana Team
- copyright
- © 2007-2012 Kohana Team
- © 2005 Harry Fuecks
- license
- http://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt
Class declared in SYSPATH/classes/kohana/utf8.php on line 26.
public static array
$calledlink to this
List of called methods that have had their required file included.
array(1) (
"transliterate_to_ascii" => bool TRUE
)
public static boolean
$server_utf8link to this
Does the server support UTF-8 natively?
bool TRUE
public static clean( mixed $var [, string $charset = NULL ] )
(defined in Kohana_UTF8)
link to this
Recursively cleans arrays, objects, and strings. Removes ASCII control
codes and converts to the requested charset while silently discarding
incompatible characters.
This method requires Iconv
Parameters
-
mixed
$var
required - Variable to clean
-
string
$charset
= NULL - Character set, defaults to Kohana::$charset
Tags
Return Values
Source Code
public static function clean( $var , $charset = NULL)
{
if ( ! $charset )
{
$charset = Kohana:: $charset ;
}
if ( is_array ( $var ) OR is_object ( $var ))
{
foreach ( $var as $key => $val )
{
$var [self::clean( $key )] = self::clean( $val );
}
}
elseif ( is_string ( $var ) AND $var !== '' )
{
$var = self::strip_ascii_ctrl( $var );
if ( ! self::is_ascii( $var ))
{
$error_reporting = error_reporting (~E_NOTICE);
$var = iconv( $charset , $charset . '//IGNORE' , $var );
error_reporting ( $error_reporting );
}
}
return $var ;
}
|
public static from_unicode( array $arr )
(defined in Kohana_UTF8)
link to this
Takes an array of ints representing the Unicode characters and returns a UTF-8 string.
Astral planes are supported i.e. the ints in the input can be > 0xFFFF.
Occurrances of the BOM are ignored. Surrogates are not allowed.
$str = UTF8::to_unicode( $array );
|
The Original Code is Mozilla Communicator client code.
The Initial Developer of the Original Code is Netscape Communications Corporation.
Portions created by the Initial Developer are Copyright (C) 1998 the Initial Developer.
Ported to PHP by Henri Sivonen hsivonen@iki.fi, see http://hsivonen.iki.fi/php-utf8/
Slight modifications to fit with phputf8 library by Harry Fuecks hfuecks@gmail.com.
Parameters
-
array
$arr
required - $str unicode code points representing a string
Return Values
string
- Utf8 string of characters
boolean
- FALSE if a code point cannot be found
Source Code
public static function from_unicode( $arr )
{
if ( ! isset(self:: $called [ __FUNCTION__ ]))
{
require Kohana::find_file( 'utf8' , __FUNCTION__ );
self:: $called [ __FUNCTION__ ] = TRUE;
}
return _from_unicode( $arr );
}
|
public static is_ascii( mixed $str )
(defined in Kohana_UTF8)
link to this
Tests whether a string contains only 7-bit ASCII bytes. This is used to
determine when to use native functions or UTF-8 functions.
$ascii = UTF8::is_ascii( $str );
|
Parameters
-
mixed
$str
required - String or array of strings to check
Return Values
Source Code
public static function is_ascii( $str )
{
if ( is_array ( $str ))
{
$str = implode( $str );
}
return ! preg_match( '/[^\x00-\x7F]/S' , $str );
}
|
public static ltrim( string $str [, string $charlist = NULL ] )
(defined in Kohana_UTF8)
link to this
Strips whitespace (or other UTF-8 characters) from the beginning of
a string. This is a UTF8-aware version of ltrim.
$str = UTF8::ltrim( $str );
|
Parameters
-
string
$str
required - Input string
-
string
$charlist
= NULL - String of characters to remove
Tags
Return Values
Source Code
public static function ltrim( $str , $charlist = NULL)
{
if ( ! isset(self:: $called [ __FUNCTION__ ]))
{
require Kohana::find_file( 'utf8' , __FUNCTION__ );
self:: $called [ __FUNCTION__ ] = TRUE;
}
return _ltrim( $str , $charlist );
}
|
public static ord( string $chr )
(defined in Kohana_UTF8)
link to this
Returns the unicode ordinal for a character. This is a UTF8-aware
version of ord.
$digit = UTF8::ord( $character );
|
Parameters
-
string
$chr
required - UTF-8 encoded character
Tags
Return Values
Source Code
public static function ord( $chr )
{
if ( ! isset(self:: $called [ __FUNCTION__ ]))
{
require Kohana::find_file( 'utf8' , __FUNCTION__ );
self:: $called [ __FUNCTION__ ] = TRUE;
}
return _ord( $chr );
}
|
public static rtrim( string $str [, string $charlist = NULL ] )
(defined in Kohana_UTF8)
link to this
Strips whitespace (or other UTF-8 characters) from the end of a string.
This is a UTF8-aware version of rtrim.
$str = UTF8::rtrim( $str );
|
Parameters
-
string
$str
required - Input string
-
string
$charlist
= NULL - String of characters to remove
Tags
Return Values
Source Code
public static function rtrim( $str , $charlist = NULL)
{
if ( ! isset(self:: $called [ __FUNCTION__ ]))
{
require Kohana::find_file( 'utf8' , __FUNCTION__ );
self:: $called [ __FUNCTION__ ] = TRUE;
}
return _rtrim( $str , $charlist );
}
|
public static str_ireplace( string|array $search , string|array $replace , string|array $str [, integer & $count = NULL ] )
(defined in Kohana_UTF8)
link to this
Returns a string or an array with all occurrences of search in subject
(ignoring case) and replaced with the given replace value. This is a
UTF8-aware version of str_ireplace.
This function is very slow compared to the native version. Avoid
using it when possible.
Parameters
-
string|array
$search
required - Text to replace
-
string|array
$replace
required - Replacement text
-
string|array
$str
required - Subject text
-
byref integer
$count
= NULL - Number of matched and replaced needles will be returned via this parameter which is passed by reference
Tags
public static str_pad( string $str , integer $final_str_length [, string $pad_str = string(1) " " , string $pad_type = integer 1 ] )
(defined in Kohana_UTF8)
link to this
Pads a UTF-8 string to a certain length with another string. This is a
UTF8-aware version of str_pad.
$str = UTF8:: str_pad ( $str , $length );
|
Parameters
-
string
$str
required - Input string
-
integer
$final_str_length
required - Desired string length after padding
-
string
$pad_str
= string(1) " " - String to use as padding
-
string
$pad_type
= integer 1 - Padding type: STR_PAD_RIGHT, STR_PAD_LEFT, or STR_PAD_BOTH
Tags
Return Values
Source Code
public static function str_pad ( $str , $final_str_length , $pad_str = ' ' , $pad_type = STR_PAD_RIGHT)
{
if ( ! isset(self:: $called [ __FUNCTION__ ]))
{
require Kohana::find_file( 'utf8' , __FUNCTION__ );
self:: $called [ __FUNCTION__ ] = TRUE;
}
return _str_pad( $str , $final_str_length , $pad_str , $pad_type );
}
|
public static str_split( string $str [, integer $split_length = integer 1 ] )
(defined in Kohana_UTF8)
link to this
Converts a UTF-8 string to an array. This is a UTF8-aware version of
str_split.
$array = UTF8:: str_split ( $str );
|
Parameters
-
string
$str
required - Input string
-
integer
$split_length
= integer 1 - Maximum length of each chunk
Tags
Return Values
Source Code
public static function str_split ( $str , $split_length = 1)
{
if ( ! isset(self:: $called [ __FUNCTION__ ]))
{
require Kohana::find_file( 'utf8' , __FUNCTION__ );
self:: $called [ __FUNCTION__ ] = TRUE;
}
return _str_split( $str , $split_length );
}
|
public static strcasecmp( string $str1 , string $str2 )
(defined in Kohana_UTF8)
link to this
Case-insensitive UTF-8 string comparison. This is a UTF8-aware version
of strcasecmp.
$compare = UTF8:: strcasecmp ( $str1 , $str2 );
|
Parameters
-
string
$str1
required - String to compare
-
string
$str2
required - String to compare
Tags
Return Values
integer
- Less than 0 if str1 is less than str2
integer
- Greater than 0 if str1 is greater than str2
integer
- 0 if they are equal
Source Code
public static function strcasecmp ( $str1 , $str2 )
{
if ( ! isset(self:: $called [ __FUNCTION__ ]))
{
require Kohana::find_file( 'utf8' , __FUNCTION__ );
self:: $called [ __FUNCTION__ ] = TRUE;
}
return _strcasecmp( $str1 , $str2 );
}
|
public static strcspn( string $str , string $mask [, integer $offset = NULL , integer $length = NULL ] )
(defined in Kohana_UTF8)
link to this
Finds the length of the initial segment not matching mask. This is a
UTF8-aware version of strcspn.
$found = UTF8:: strcspn ( $str , $mask );
|
Parameters
-
string
$str
required - Input string
-
string
$mask
required - Mask for search
-
integer
$offset
= NULL - Start position of the string to examine
-
integer
$length
= NULL - Length of the string to examine
Tags
Return Values
integer
- Length of the initial segment that contains characters not in the mask
Source Code
public static function strcspn ( $str , $mask , $offset = NULL, $length = NULL)
{
if ( ! isset(self:: $called [ __FUNCTION__ ]))
{
require Kohana::find_file( 'utf8' , __FUNCTION__ );
self:: $called [ __FUNCTION__ ] = TRUE;
}
return _strcspn( $str , $mask , $offset , $length );
}
|
public static strip_ascii_ctrl( string $str )
(defined in Kohana_UTF8)
link to this
Strips out device control codes in the ASCII range.
$str = UTF8::strip_ascii_ctrl( $str );
|
Parameters
-
string
$str
required - String to clean
Return Values
Source Code
public static function strip_ascii_ctrl( $str )
{
return preg_replace( '/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S' , '' , $str );
}
|
public static strip_non_ascii( string $str )
(defined in Kohana_UTF8)
link to this
Strips out all non-7bit ASCII bytes.
$str = UTF8::strip_non_ascii( $str );
|
Parameters
-
string
$str
required - String to clean
Return Values
Source Code
public static function strip_non_ascii( $str )
{
return preg_replace( '/[^\x00-\x7F]+/S' , '' , $str );
}
|
public static stristr( string $str , string $search )
(defined in Kohana_UTF8)
link to this
Case-insenstive UTF-8 version of strstr. Returns all of input string
from the first occurrence of needle to the end. This is a UTF8-aware
version of stristr.
$found = UTF8:: stristr ( $str , $search );
|
Parameters
-
string
$str
required - Input string
-
string
$search
required - Needle
Tags
Return Values
string
- Matched substring if found
FALSE
- If the substring was not found
Source Code
public static function stristr ( $str , $search )
{
if ( ! isset(self:: $called [ __FUNCTION__ ]))
{
require Kohana::find_file( 'utf8' , __FUNCTION__ );
self:: $called [ __FUNCTION__ ] = TRUE;
}
return _stristr( $str , $search );
}
|
public static strlen( string $str )
(defined in Kohana_UTF8)
link to this
Returns the length of the given string. This is a UTF8-aware version
of strlen.
$length = UTF8:: strlen ( $str );
|
Parameters
-
string
$str
required - String being measured for length
Tags
Return Values
Source Code
public static function strlen ( $str )
{
if (UTF8:: $server_utf8 )
return mb_strlen( $str , Kohana:: $charset );
if ( ! isset(self:: $called [ __FUNCTION__ ]))
{
require Kohana::find_file( 'utf8' , __FUNCTION__ );
self:: $called [ __FUNCTION__ ] = TRUE;
}
return _strlen( $str );
}
|
public static strpos( string $str , string $search [, integer $offset = integer 0 ] )
(defined in Kohana_UTF8)
link to this
Finds position of first occurrence of a UTF-8 string. This is a
UTF8-aware version of strpos.
$position = UTF8:: strpos ( $str , $search );
|
Parameters
-
string
$str
required - Haystack
-
string
$search
required - Needle
-
integer
$offset
= integer 0 - Offset from which character in haystack to start searching
Tags
Return Values
integer
- Position of needle
boolean
- FALSE if the needle is not found
Source Code
public static function strpos ( $str , $search , $offset = 0)
{
if (UTF8:: $server_utf8 )
return mb_strpos( $str , $search , $offset , Kohana:: $charset );
if ( ! isset(self:: $called [ __FUNCTION__ ]))
{
require Kohana::find_file( 'utf8' , __FUNCTION__ );
self:: $called [ __FUNCTION__ ] = TRUE;
}
return _strpos( $str , $search , $offset );
}
|
public static strrev( string $str )
(defined in Kohana_UTF8)
link to this
Reverses a UTF-8 string. This is a UTF8-aware version of strrev.
$str = UTF8:: strrev ( $str );
|
Parameters
-
string
$str
required - String to be reversed
Tags
Return Values
Source Code
public static function strrev ( $str )
{
if ( ! isset(self:: $called [ __FUNCTION__ ]))
{
require Kohana::find_file( 'utf8' , __FUNCTION__ );
self:: $called [ __FUNCTION__ ] = TRUE;
}
return _strrev( $str );
}
|
public static strrpos( string $str , string $search [, integer $offset = integer 0 ] )
(defined in Kohana_UTF8)
link to this
Finds position of last occurrence of a char in a UTF-8 string. This is
a UTF8-aware version of strrpos.
$position = UTF8:: strrpos ( $str , $search );
|
Parameters
-
string
$str
required - Haystack
-
string
$search
required - Needle
-
integer
$offset
= integer 0 - Offset from which character in haystack to start searching
Tags
Return Values
integer
- Position of needle
boolean
- FALSE if the needle is not found
Source Code
public static function strrpos ( $str , $search , $offset = 0)
{
if (UTF8:: $server_utf8 )
return mb_strrpos( $str , $search , $offset , Kohana:: $charset );
if ( ! isset(self:: $called [ __FUNCTION__ ]))
{
require Kohana::find_file( 'utf8' , __FUNCTION__ );
self:: $called [ __FUNCTION__ ] = TRUE;
}
return _strrpos( $str , $search , $offset );
}
|
public static strspn( string $str , string $mask [, integer $offset = NULL , integer $length = NULL ] )
(defined in Kohana_UTF8)
link to this
Finds the length of the initial segment matching mask. This is a
UTF8-aware version of strspn.
$found = UTF8:: strspn ( $str , $mask );
|
Parameters
-
string
$str
required - Input string
-
string
$mask
required - Mask for search
-
integer
$offset
= NULL - Start position of the string to examine
-
integer
$length
= NULL - Length of the string to examine
Tags
Return Values
integer
- Length of the initial segment that contains characters in the mask
Source Code
public static function strspn ( $str , $mask , $offset = NULL, $length = NULL)
{
if ( ! isset(self:: $called [ __FUNCTION__ ]))
{
require Kohana::find_file( 'utf8' , __FUNCTION__ );
self:: $called [ __FUNCTION__ ] = TRUE;
}
return _strspn( $str , $mask , $offset , $length );
}
|
public static strtolower( string $str )
(defined in Kohana_UTF8)
link to this
Makes a UTF-8 string lowercase. This is a UTF8-aware version
of strtolower.
$str = UTF8:: strtolower ( $str );
|
Parameters
-
string
$str
required - Mixed case string
Tags
Return Values
Source Code
public static function strtolower ( $str )
{
if (UTF8:: $server_utf8 )
return mb_strtolower( $str , Kohana:: $charset );
if ( ! isset(self:: $called [ __FUNCTION__ ]))
{
require Kohana::find_file( 'utf8' , __FUNCTION__ );
self:: $called [ __FUNCTION__ ] = TRUE;
}
return _strtolower( $str );
}
|
public static strtoupper( string $str )
(defined in Kohana_UTF8)
link to this
Makes a UTF-8 string uppercase. This is a UTF8-aware version
of strtoupper.
Parameters
-
string
$str
required - Mixed case string
Tags
Return Values
Source Code
public static function strtoupper ( $str )
{
if (UTF8:: $server_utf8 )
return mb_strtoupper( $str , Kohana:: $charset );
if ( ! isset(self:: $called [ __FUNCTION__ ]))
{
require Kohana::find_file( 'utf8' , __FUNCTION__ );
self:: $called [ __FUNCTION__ ] = TRUE;
}
return _strtoupper( $str );
}
|
public static substr( string $str , integer $offset [, integer $length = NULL ] )
(defined in Kohana_UTF8)
link to this
Returns part of a UTF-8 string. This is a UTF8-aware version
of substr.
$sub = UTF8:: substr ( $str , $offset );
|
Parameters
-
string
$str
required - Input string
-
integer
$offset
required - Offset
-
integer
$length
= NULL - Length limit
Tags
Return Values
Source Code
public static function substr ( $str , $offset , $length = NULL)
{
if (UTF8:: $server_utf8 )
return ( $length === NULL)
? mb_substr( $str , $offset , mb_strlen( $str ), Kohana:: $charset )
: mb_substr( $str , $offset , $length , Kohana:: $charset );
if ( ! isset(self:: $called [ __FUNCTION__ ]))
{
require Kohana::find_file( 'utf8' , __FUNCTION__ );
self:: $called [ __FUNCTION__ ] = TRUE;
}
return _substr( $str , $offset , $length );
}
|
public static substr_replace( string $str , string $replacement , integer $offset [, $length = NULL ] )
(defined in Kohana_UTF8)
link to this
Replaces text within a portion of a UTF-8 string. This is a UTF8-aware
version of substr_replace.
$str = UTF8::substr_replace( $str , $replacement , $offset );
|
Parameters
-
string
$str
required - Input string
-
string
$replacement
required - Replacement string
-
integer
$offset
required - Offset
-
unknown
$length
= NULL
Tags
Return Values
Source Code
public static function substr_replace( $str , $replacement , $offset , $length = NULL)
{
if ( ! isset(self:: $called [ __FUNCTION__ ]))
{
require Kohana::find_file( 'utf8' , __FUNCTION__ );
self:: $called [ __FUNCTION__ ] = TRUE;
}
return _substr_replace( $str , $replacement , $offset , $length );
}
|
public static to_unicode( string $str )
(defined in Kohana_UTF8)
link to this
Takes an UTF-8 string and returns an array of ints representing the Unicode characters.
Astral planes are supported i.e. the ints in the output can be > 0xFFFF.
Occurrences of the BOM are ignored. Surrogates are not allowed.
$array = UTF8::to_unicode( $str );
|
The Original Code is Mozilla Communicator client code.
The Initial Developer of the Original Code is Netscape Communications Corporation.
Portions created by the Initial Developer are Copyright (C) 1998 the Initial Developer.
Ported to PHP by Henri Sivonen hsivonen@iki.fi, see http://hsivonen.iki.fi/php-utf8/
Slight modifications to fit with phputf8 library by Harry Fuecks hfuecks@gmail.com
Parameters
-
string
$str
required - UTF-8 encoded string
Return Values
array
- Unicode code points
FALSE
- If the string is invalid
Source Code
public static function to_unicode( $str )
{
if ( ! isset(self:: $called [ __FUNCTION__ ]))
{
require Kohana::find_file( 'utf8' , __FUNCTION__ );
self:: $called [ __FUNCTION__ ] = TRUE;
}
return _to_unicode( $str );
}
|
public static transliterate_to_ascii( string $str [, integer $case = integer 0 ] )
(defined in Kohana_UTF8)
link to this
Replaces special/accented UTF-8 characters by ASCII-7 "equivalents".
$ascii = UTF8::transliterate_to_ascii( $utf8 );
|
Parameters
-
string
$str
required - String to transliterate
-
integer
$case
= integer 0 - -1 lowercase only, +1 uppercase only, 0 both cases
Tags
Return Values
Source Code
public static function transliterate_to_ascii( $str , $case = 0)
{
if ( ! isset(self:: $called [ __FUNCTION__ ]))
{
require Kohana::find_file( 'utf8' , __FUNCTION__ );
self:: $called [ __FUNCTION__ ] = TRUE;
}
return _transliterate_to_ascii( $str , $case );
}
|
public static trim( string $str [, string $charlist = NULL ] )
(defined in Kohana_UTF8)
link to this
Strips whitespace (or other UTF-8 characters) from the beginning and
end of a string. This is a UTF8-aware version of trim.
Parameters
-
string
$str
required - Input string
-
string
$charlist
= NULL - String of characters to remove
Tags
Return Values
Source Code
public static function trim( $str , $charlist = NULL)
{
if ( ! isset(self:: $called [ __FUNCTION__ ]))
{
require Kohana::find_file( 'utf8' , __FUNCTION__ );
self:: $called [ __FUNCTION__ ] = TRUE;
}
return _trim( $str , $charlist );
}
|
public static ucfirst( string $str )
(defined in Kohana_UTF8)
link to this
Makes a UTF-8 string's first character uppercase. This is a UTF8-aware
version of ucfirst.
$str = UTF8::ucfirst( $str );
|
Parameters
-
string
$str
required - Mixed case string
Tags
Return Values
Source Code
public static function ucfirst( $str )
{
if ( ! isset(self:: $called [ __FUNCTION__ ]))
{
require Kohana::find_file( 'utf8' , __FUNCTION__ );
self:: $called [ __FUNCTION__ ] = TRUE;
}
return _ucfirst( $str );
}
|
public static ucwords( string $str )
(defined in Kohana_UTF8)
link to this
Makes the first character of every word in a UTF-8 string uppercase.
This is a UTF8-aware version of ucwords.
$str = UTF8::ucwords( $str );
|
Parameters
-
string
$str
required - Mixed case string
Tags
Return Values
Source Code
public static function ucwords( $str )
{
if ( ! isset(self:: $called [ __FUNCTION__ ]))
{
require Kohana::find_file( 'utf8' , __FUNCTION__ );
self:: $called [ __FUNCTION__ ] = TRUE;
}
return _ucwords( $str );
}
|