This commit is contained in:
jygaulier
2013-05-07 20:18:03 +02:00
parent 78326b3da1
commit 6990b004ba
2 changed files with 23 additions and 17 deletions

View File

@@ -16,13 +16,13 @@
*/ */
class unicode class unicode
{ {
// second parameter to utf8_convert_to(...) // second argument to convert_to(...)
const CONVERT_TO_LC = 'lc'; // lowercase const CONVERT_TO_LC = 'lc'; // lowercase
const CONVERT_TO_ND = 'nd'; // no-diacritics const CONVERT_TO_ND = 'nd'; // no-diacritics
const CONVERT_TO_LCND = 'lcnd'; // lowercase no-diacritics const CONVERT_TO_LCND = 'lcnd'; // lowercase no-diacritics
static protected $map = array( static protected $map = array(
'lc' => array( self::CONVERT_TO_LC => array(
"\x41" => "\x61" , /* U+0041: LATIN CAPITAL LETTER A -> U+0061: LATIN SMALL LETTER A */ "\x41" => "\x61" , /* U+0041: LATIN CAPITAL LETTER A -> U+0061: LATIN SMALL LETTER A */
"\x42" => "\x62" , /* U+0042: LATIN CAPITAL LETTER B -> U+0062: LATIN SMALL LETTER B */ "\x42" => "\x62" , /* U+0042: LATIN CAPITAL LETTER B -> U+0062: LATIN SMALL LETTER B */
"\x43" => "\x63" , /* U+0043: LATIN CAPITAL LETTER C -> U+0063: LATIN SMALL LETTER C */ "\x43" => "\x63" , /* U+0043: LATIN CAPITAL LETTER C -> U+0063: LATIN SMALL LETTER C */
@@ -489,7 +489,7 @@ class unicode
"\xD5\x96" => "\xD6\x86" /* U+0556: ARMENIAN CAPITAL LETTER FEH -> U+0586: ARMENIAN SMALL LETTER FEH */ "\xD5\x96" => "\xD6\x86" /* U+0556: ARMENIAN CAPITAL LETTER FEH -> U+0586: ARMENIAN SMALL LETTER FEH */
), ),
'nd' => array( self::CONVERT_TO_ND => array(
"\xC2\xA0" => "\x20" , /* U+00A0: NO-BREAK SPACE -> U+0020: SPACE */ "\xC2\xA0" => "\x20" , /* U+00A0: NO-BREAK SPACE -> U+0020: SPACE */
"\xC2\xA8" => "\x20" , /* U+00A8: DIAERESIS -> U+0020: SPACE */ "\xC2\xA8" => "\x20" , /* U+00A8: DIAERESIS -> U+0020: SPACE */
"\xC2\xAA" => "\x61" , /* U+00AA: FEMININE ORDINAL INDICATOR -> U+0061: LATIN SMALL LETTER A */ "\xC2\xAA" => "\x61" , /* U+00AA: FEMININE ORDINAL INDICATOR -> U+0061: LATIN SMALL LETTER A */
@@ -867,7 +867,7 @@ class unicode
"\xD3\xB9" => "\xD1\x8B" /* U+04F9: CYRILLIC SMALL LETTER YERU WITH DIAERESIS -> U+044B: CYRILLIC SMALL LETTER YERU */ "\xD3\xB9" => "\xD1\x8B" /* U+04F9: CYRILLIC SMALL LETTER YERU WITH DIAERESIS -> U+044B: CYRILLIC SMALL LETTER YERU */
), ),
'lcnd' => array( self::CONVERT_TO_LCND => array(
"\x41" => "\x61" , /* U+0041: LATIN CAPITAL LETTER A -> U+0061: LATIN SMALL LETTER A */ "\x41" => "\x61" , /* U+0041: LATIN CAPITAL LETTER A -> U+0061: LATIN SMALL LETTER A */
"\x42" => "\x62" , /* U+0042: LATIN CAPITAL LETTER B -> U+0062: LATIN SMALL LETTER B */ "\x42" => "\x62" , /* U+0042: LATIN CAPITAL LETTER B -> U+0062: LATIN SMALL LETTER B */
"\x43" => "\x63" , /* U+0043: LATIN CAPITAL LETTER C -> U+0063: LATIN SMALL LETTER C */ "\x43" => "\x63" , /* U+0043: LATIN CAPITAL LETTER C -> U+0063: LATIN SMALL LETTER C */
@@ -1572,10 +1572,16 @@ class unicode
* @throws Exception_InvalidArgument * @throws Exception_InvalidArgument
* *
*/ */
public function utf8_convert_to($s, $method) public function convert_to($s, $method)
{ {
if (!in_array($method, array(self::CONVERT_TO_LC, self::CONVERT_TO_ND, self::CONVERT_TO_LCND))) { $ok_methods = array_keys(self::$map);
throw new Exception_InvalidArgument('Wrong method "' . $method . '" to unicode::utf8_convert_to(...).'); if (!in_array($method, $ok_methods)) {
throw new Exception_InvalidArgument(
sprintf('Invalid argument 2 "%s", valid values are [%s].'
, $method
, implode('|', $ok_methods)
)
);
} }
if (function_exists('phrasea_utf8_convert_to')) { if (function_exists('phrasea_utf8_convert_to')) {
// function exists in phrasea extension // function exists in phrasea extension
@@ -1594,7 +1600,7 @@ class unicode
{ {
$so = ""; $so = "";
$string = $this->utf8_convert_to($string, 'lcnd'); $string = $this->convert_to($string, 'lcnd');
$l = mb_strlen($string, "UTF-8"); $l = mb_strlen($string, "UTF-8");
$lastwasblank = false; $lastwasblank = false;
@@ -1615,7 +1621,7 @@ class unicode
public function remove_diacritics($string) public function remove_diacritics($string)
{ {
return $this->utf8_convert_to($string, 'nd'); return $this->convert_to($string, 'nd');
} }
public function remove_nonazAZ09($string, $keep_underscores = true, $keep_minus = true, $keep_dot = false) public function remove_nonazAZ09($string, $keep_underscores = true, $keep_minus = true, $keep_dot = false)

View File

@@ -14,22 +14,22 @@ class unicodeTest extends PhraseanetPHPUnitAbstract
} }
/** /**
* @covers \unicode::testUtf8_convert_to * @covers \unicode::convert_to
*/ */
public function testUtf8_convert_to() public function testConvert_to()
{ {
$this->assertEquals('éléphant à rôtir', $this->object->utf8_convert_to('ÉLÉPHANT à rôtir', unicode::CONVERT_TO_LC)); $this->assertEquals('éléphant à rôtir', $this->object->convert_to('ÉLÉPHANT à rôtir', unicode::CONVERT_TO_LC));
$this->assertEquals('ELEPHANT a rotir', $this->object->utf8_convert_to('ÉLÉPHANT à rôtir', unicode::CONVERT_TO_ND)); $this->assertEquals('ELEPHANT a rotir', $this->object->convert_to('ÉLÉPHANT à rôtir', unicode::CONVERT_TO_ND));
$this->assertEquals('elephant a rotir', $this->object->utf8_convert_to('ÉLÉPHANT à rôtir', unicode::CONVERT_TO_LCND)); $this->assertEquals('elephant a rotir', $this->object->convert_to('ÉLÉPHANT à rôtir', unicode::CONVERT_TO_LCND));
} }
/** /**
* @covers \unicode::testUtf8_convert_to * @covers \unicode::convert_to
* @expectedException Exception_InvalidArgument * @expectedException Exception_InvalidArgument
*/ */
public function testUtf8_convert_to_ex() public function testConvert_to_ex()
{ {
$this->assertEquals('éléphant à rôtir', $this->object->utf8_convert_to('ÉLÉPHANT à rôtir', 'bad-method')); $this->object->Convert_to('ÉLÉPHANT à rôtir', 'unexistant contant');
} }
/** /**