diff --git a/lib/classes/module/console/sphinxGenerateSuggestion.class.php b/lib/classes/module/console/sphinxGenerateSuggestion.class.php new file mode 100644 index 0000000000..5aec86995a --- /dev/null +++ b/lib/classes/module/console/sphinxGenerateSuggestion.class.php @@ -0,0 +1,148 @@ +setDescription('Generate suggestions for Sphinx Search Engine'); + + + return $this; + } + + public function execute(InputInterface $input, OutputInterface $output) + { + define('FREQ_THRESHOLD', 10); + define('SUGGEST_DEBUG', 0); + + $appbox = \appbox::get_instance(); + $registry = $appbox->get_registry(); + + $params = phrasea::sbas_params(); + + foreach ($params as $sbas_id => $p) + { + $index = crc32( + str_replace( + array('.', '%') + , '_' + , sprintf('%s_%s_%s_%s', $p['host'], $p['port'], $p['user'], $p['dbname']) + ) + ); + + $tmp_file = $registry->get('GV_RootPath') . 'tmp/dict' . $index . '.txt'; + + $databox = databox::get_instance($sbas_id); + + $output->writeln("process Databox " . $databox->get_viewname() . " / $index\n"); + + $cmd = '/usr/local/bin/indexer metadatas' . $index . ' --buildstops ' . $tmp_file . ' 1000000 --buildfreqs'; + exec($cmd); + + try + { + $connbas = connection::getPDOConnection($sbas_id); + } + catch (Exception $e) + { + continue; + } + + $sql = 'TRUNCATE suggest'; + $stmt = $connbas->prepare($sql); + $stmt->execute(); + $stmt->closeCursor(); + + $sql = $this->BuildDictionarySQL($output, file_get_contents($tmp_file)); + + if (trim($sql) !== '') + { + $stmt = $connbas->prepare($sql); + $stmt->execute(); + $stmt->closeCursor(); + } + + unlink($tmp_file); + } + + return 0; + } + + protected function BuildTrigrams($keyword) + { + $t = "__" . $keyword . "__"; + + $trigrams = ""; + for ($i = 0; $i < strlen($t) - 2; $i++) + $trigrams .= substr($t, $i, 3) . " "; + + return $trigrams; + } + + protected function BuildDictionarySQL(OutputInterface $output, $in) + { + $out = ''; + + $n = 0; + $lines = explode("\n", $in); + foreach ($lines as $line) + { + if (trim($line) === '') + continue; + list ( $keyword, $freq ) = explode(" ", trim($line)); + + if ($freq < FREQ_THRESHOLD || strstr($keyword, "_") !== false || strstr($keyword, "'") !== false) + continue; + + if (ctype_digit($keyword)) + { + continue; + } + if (mb_strlen($keyword) < 3) + { + continue; + } + + $trigrams = $this->BuildTrigrams($keyword); + + if ($n++) + $out .= ",\n"; + $out .= "( $n, '$keyword', '$trigrams', $freq )"; + } + + if (trim($out) !== '') + { + $out = "INSERT INTO suggest VALUES " . $out . ";"; + } + + $output->writeln(sprintf("Generated %d suggestions", $n)); + + return $out; + } + +}