@[\]^_`{|}~£§¨°"; public function get_indexer_bad_chars() { return $this->endCharacters_utf8; } public function has_indexer_bad_char($string) { return mb_strpos($this->endCharacters_utf8, $string); } public function remove_indexer_chars($string) { $so = ""; $string = phrasea_utf8_convert_to($string, 'lcnd'); // $l = mb_strlen($string, "UTF-8"); $lastwasblank = false; for ($i = 0; $i < $l; $i ++) { $c = mb_substr($string, $i, 1, "UTF-8"); if (mb_strpos($this->endCharacters_utf8, $c) !== FALSE) { $lastwasblank = true; } else { if ($lastwasblank && $so != "") $so .= " "; $so .= $c; $lastwasblank = false; } } return($so); } public function remove_diacritics($string) { return phrasea_utf8_convert_to($string, 'nd'); } public function remove_nonazAZ09($string, $keep_underscores = true, $keep_minus = true, $keep_dot = false) { $regexp = '/[a-zA-Z0-9'; if ($keep_minus === true) { $regexp .= '-'; } if ($keep_underscores === true) { $regexp .= '_'; } if ($keep_dot === true) { $regexp .= '\.'; } $regexp .= ']{1}/'; $string = $this->remove_diacritics($string); $out = ''; $l = mb_strlen($string); for ($i = 0; $i < $l; $i ++) { $c = mb_substr($string, $i, 1); if (preg_match($regexp, $c)) $out .= $c; } return $out; } /** * Removes all digits a the begining of a string * @Example : returns 'soleil' for '123soleil' and 'bb2' for '1bb2' * * @param type $string * @return type */ public function remove_first_digits($string) { while ($string != '' && ctype_digit($string[0])) { $string = substr($string, 1); } return $string; } /** * Guess the charset of a string and returns the UTF-8 version * * @param string $string * @return string */ public function toUTF8($string) { /** * (8x except 85, 8C) + (9x except 9C) + (BC, BD, BE) */ static $macchars = "\x81\x82\x83\x84\x86\x87\x88\x89\x8A\x8B\x8D\x8E\x8F\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9A\x9B\x9D\x9E\x9F\xBC\xBD\xBE"; if (mb_convert_encoding(mb_convert_encoding($string, 'UTF-32', 'UTF-8'), 'UTF-8', 'UTF-32') == $string) { $mac = mb_convert_encoding($string, 'windows-1252', 'UTF-8'); for ($i = strlen($mac); $i;) { if (strpos($macchars, $mac[ -- $i]) !== false) { return(iconv('MACINTOSH', 'UTF-8', $mac)); } } return($string); } else { for ($i = strlen($string); $i;) { if (strpos($macchars, $string[ -- $i]) !== false) { return(iconv('MACINTOSH', 'UTF-8', $string)); } } return(iconv('windows-1252', 'UTF-8', $string)); } } /** * Removes ctrl chars (tous < 32 sauf 9,10,13) * * @param string $string * @param string $substitution * * @return string */ public function substituteCtrlCharacters($string, $substitution = '_') { static $chars_in = null; if (is_null($chars_in)) { $chars_in = array(); for ($cc = 0; $cc < 32; $cc ++) { if (in_array($cc, array(9, 10, 13))) { continue; } $chars_in[] = chr($cc); } } return str_replace($chars_in, $substitution, $string); } /** * Parse a string and try to return the date normalized * * @example usage : * * //returns '2012/00/00 00:00:00' * $unicode->parseDate('2012'); * * @todo timezonify * * @param string $date * @return string */ public function parseDate($date) { $date = str_replace(array('-', ':', '/', '.'), ' ', $date); $date_yyyy = $date_mm = $date_dd = $date_hh = $date_ii = $date_ss = 0; switch (sscanf($date, '%d %d %d %d %d %d', $date_yyyy, $date_mm, $date_dd, $date_hh, $date_ii, $date_ss)) { case 1: $date = sprintf('%04d/00/00 00:00:00', $date_yyyy); break; case 2: $date = sprintf('%04d/%02d/00 00:00:00', $date_yyyy, $date_mm); break; case 3: $date = sprintf('%04d/%02d/%02d 00:00:00', $date_yyyy, $date_mm, $date_dd); break; case 4: $date = sprintf('%04d/%02d/%02d %02d:00:00', $date_yyyy, $date_mm, $date_dd, $date_hh); break; case 5: $date = sprintf('%04d/%02d/%02d %02d:%02d:00', $date_yyyy, $date_mm, $date_dd, $date_hh, $date_ii); break; case 6: $date = sprintf('%04d/%02d/%02d %02d:%02d:%02d', $date_yyyy, $date_mm, $date_dd, $date_hh, $date_ii, $date_ss); break; default: $date = '0000/00/00 00:00:00'; } return $date; } }