PHRAS-2748_bad-chars-cterms_4.1

fix : remove bad ascii chars (0...31 except tab; cr; lf) from cterms values
todo ? : also cleanup bad chars from prod/editing and api/editing ?
This commit is contained in:
Jean-Yves Gaulier
2019-11-05 17:59:03 +01:00
parent 28c55ddf7a
commit da532cc36a
2 changed files with 23 additions and 0 deletions

View File

@@ -63,4 +63,26 @@ class StringUtils
return self::$transliterator->transliterate($string);
}
/**
* replace bad chars (ascii 0...31 except 9,10,13)
*
* @param $s
* @param string $replace
* @return mixed
*/
public static function substituteCtrlCharacters($s, $replace = '_')
{
static $bad_chars = null;
if($bad_chars === null) {
$bad_chars = [];
for($i=0; $i<32; $i++) {
if($i != 9 && $i != 10 && $i != 13) {
$bad_chars[] = chr($i);
}
}
}
return str_replace($bad_chars, $replace, $s);
}
}

View File

@@ -32,6 +32,7 @@ class CandidateTerms
public function insert($field, $value)
{
$value = StringUtils::substituteCtrlCharacters($value, '');
$this->ensureVisitorSetup();
if (!$this->visitor->hasTerm($field, $value)) {
$this->new_candidates[$value] = $field;