From a104cd879fbd5a8b23ea660c61d3942579c052b6 Mon Sep 17 00:00:00 2001 From: Romain Neutron Date: Mon, 27 Aug 2012 12:32:36 +0200 Subject: [PATCH] [SearchEngine] Rename directories --- .../SearchEngine/Phrasea/PhraseaEngine.php | 230 +- .../Phrasea/PhraseaEngineQueryParser.php | 188 +- .../Phrasea/SearchEngine/PhraseaEngine.php | 517 ----- .../SearchEngine/PhraseaEngineQueryParser.php | 1954 ----------------- .../SearchEngine/SearchEngineInterface.php | 2 + .../Phrasea/SearchEngine/SphinxSearch.php | 689 ------ .../SphinxSearch/SphinxSearchEngine.php | 225 +- 7 files changed, 259 insertions(+), 3546 deletions(-) delete mode 100644 lib/Alchemy/Phrasea/SearchEngine/PhraseaEngine.php delete mode 100644 lib/Alchemy/Phrasea/SearchEngine/PhraseaEngineQueryParser.php delete mode 100644 lib/Alchemy/Phrasea/SearchEngine/SphinxSearch.php diff --git a/lib/Alchemy/Phrasea/SearchEngine/Phrasea/PhraseaEngine.php b/lib/Alchemy/Phrasea/SearchEngine/Phrasea/PhraseaEngine.php index 754541a5ef..22c2a360d0 100644 --- a/lib/Alchemy/Phrasea/SearchEngine/Phrasea/PhraseaEngine.php +++ b/lib/Alchemy/Phrasea/SearchEngine/Phrasea/PhraseaEngine.php @@ -11,86 +11,37 @@ namespace Alchemy\Phrasea\SearchEngine\Phrasea; -use Alchemy\Phrasea\Application; use Alchemy\Phrasea\SearchEngine\SearchEngineInterface; use Alchemy\Phrasea\SearchEngine\SearchEngineOptions; use Alchemy\Phrasea\SearchEngine\SearchEngineResult; use Alchemy\Phrasea\Exception\RuntimeException; use Doctrine\Common\Collections\ArrayCollection; +use Silex\Application; +use Symfony\Component\HttpFoundation\Request; class PhraseaEngine implements SearchEngineInterface { - private $initialized; /** * * @var SearchEngineOptions */ - private $options; - private $app; - private $queries = array(); - private $arrayq = array(); - private $colls = array(); - private $qp = array(); - private $needthesaurus = array(); - private $configurationPanel; - private $resetCacheNextQuery = false; + protected $options; + protected $queries = array(); + protected $arrayq = array(); + protected $colls = array(); + protected $qp = array(); + protected $needthesaurus = array(); + protected $configurationPanel; + protected $resetCacheNextQuery = false; /** * {@inheritdoc} */ - public function __construct(Application $app) + public function __construct() { - $this->app = $app; $this->options = new SearchEngineOptions(); } - public function initialize() - { - if ($this->initialized) { - return $this; - } - - $choosenConnexion = $this->app['phraseanet.configuration']->getPhraseanet()->get('database'); - - $connexion = $this->app['phraseanet.configuration']->getConnexion($choosenConnexion); - - $hostname = $connexion->get('host'); - $port = (int) $connexion->get('port'); - $user = $connexion->get('user'); - $password = $connexion->get('password'); - $dbname = $connexion->get('dbname'); - - if (!extension_loaded('phrasea2')) { - throw new RuntimeException('Phrasea extension is required'); - } - - if (!function_exists('phrasea_conn')) { - throw new RuntimeException('Phrasea extension requires upgrade'); - } - - if (phrasea_conn($hostname, $port, $user, $password, $dbname) !== true) { - throw new RuntimeException('Unable to initialize Phrasea connection'); - } - - $this->initialized = true; - - return $this; - } - - private function checkSession() - { - if (!$this->app['phraseanet.user']) { - throw new \RuntimeException('Phrasea currently support only authenticated queries'); - } - - if (!phrasea_open_session($this->app['session']->get('phrasea_session_id'), $this->app['phraseanet.user']->get_id())) { - if (!$ses_id = phrasea_create_session((string) $this->app['phraseanet.user']->get_id())) { - throw new \Exception_InternalServerError('Unable to create phrasea session'); - } - $this->app['session']->set('phrasea_session_id', $ses_id); - } - } - /** * {@inheritdoc} */ @@ -104,15 +55,26 @@ class PhraseaEngine implements SearchEngineInterface return $status; } - public function configurationPanel() + public function getConfigurationPanel(Application $app, Request $request) { - if (!$this->configurationPanel) { + + } + + public function postConfigurationPanel(Application $app, Request $request) + { + + } + + private function configurationPanel() + { + if ( ! $this->configurationPanel) { $this->configurationPanel = new ConfigurationPanel($this); } return $this->configurationPanel; } + /** * {@inheritdoc} */ @@ -161,7 +123,7 @@ class PhraseaEngine implements SearchEngineInterface */ public function updateRecord(\record_adapter $record) { - $record->set_binary_status(\databox_status::dec2bin($this->app, bindec($record->get_status()) & ~7 | 4)); + $record->set_binary_status(\databox_status::dec2bin(bindec($record->get_status()) & ~7 | 4)); return $this; } @@ -235,9 +197,6 @@ class PhraseaEngine implements SearchEngineInterface */ public function query($query, $offset, $perPage) { - $this->initialize(); - $this->checkSession(); - assert(is_int($offset)); assert($offset >= 0); assert(is_int($perPage)); @@ -250,9 +209,12 @@ class PhraseaEngine implements SearchEngineInterface $query .= ' AND recordtype=' . $this->options->getRecordType(); } + $appbox = \appbox::get_instance(\bootstrap::getCore()); + $session = $appbox->get_session(); + $sql = 'SELECT query, query_time, duration, total FROM cache WHERE session_id = :ses_id'; - $stmt = $this->app['phraseanet.appbox']->get_connection()->prepare($sql); - $stmt->execute(array(':ses_id' => $this->app['session']->get('phrasea_session_id'))); + $stmt = $appbox->get_connection()->prepare($sql); + $stmt->execute(array(':ses_id' => $session->get_ses_id())); $row = $stmt->fetch(\PDO::FETCH_ASSOC); $stmt->closeCursor(); @@ -267,24 +229,24 @@ class PhraseaEngine implements SearchEngineInterface } if ($this->resetCacheNextQuery === true) { - phrasea_clear_cache($this->app['session']->get('phrasea_session_id')); + phrasea_clear_cache($session->get_ses_id()); $this->addQuery($query); $this->executeQuery($query); $sql = 'SELECT query, query_time, duration, total FROM cache WHERE session_id = :ses_id'; - $stmt = $this->app['phraseanet.appbox']->get_connection()->prepare($sql); - $stmt->execute(array(':ses_id' => $this->app['session']->get('phrasea_session_id'))); + $stmt = $appbox->get_connection()->prepare($sql); + $stmt->execute(array(':ses_id' => $session->get_ses_id())); $row = $stmt->fetch(\PDO::FETCH_ASSOC); $stmt->closeCursor(); } else { /** * @todo clean this in DB */ - $this->total_available = $this->total_results = $this->app['session']->get('phrasea_engine_n_results'); + $this->total_available = $this->total_results = $session->get_session_prefs('phrasea_engine_n_results'); } $res = phrasea_fetch_results( - $this->app['session']->get('phrasea_session_id'), $offset + 1, $perPage, false + $session->get_ses_id(), $offset + 1, $perPage, false ); $rs = array(); @@ -301,31 +263,29 @@ class PhraseaEngine implements SearchEngineInterface foreach ($rs as $data) { try { $records->add(new \record_adapter( - $this->app, - \phrasea::sbasFromBas($this->app, $data['base_id']), + \phrasea::sbasFromBas($data['base_id']), $data['record_id'], $resultNumber )); } catch (Exception $e) { } - $resultNumber++; + $resultNumber ++; } return new SearchEngineResult($records, $query, $row['duration'], $offset, $row['total'], $row['total'], $error, '', new ArrayCollection(), new ArrayCollection(), ''); } - public static function create(Application $app) - { - return new static($app); - } - /** * {@inheritdoc} */ private function executeQuery($query) { + $appbox = \appbox::get_instance(\bootstrap::getCore()); + $session = $appbox->get_session(); + $registry = $appbox->get_registry(); + $dateLog = date("Y-m-d H:i:s"); $nbanswers = $total_time = 0; $sort = ''; @@ -351,12 +311,12 @@ class PhraseaEngine implements SearchEngineInterface } $results = phrasea_query2( - $this->app['session']->get('phrasea_session_id') + $session->get_ses_id() , $sbas_id , $this->colls[$sbas_id] , $this->arrayq[$sbas_id] - , $this->app['phraseanet.registry']->get('GV_sit') - , $this->app['session']->get('usr_id') + , $registry->get('GV_sit') + , (string) $session->get_usr_id() , false , $this->options->searchType() == SearchEngineOptions::RECORD_GROUPING ? PHRASEA_MULTIDOC_REGONLY : PHRASEA_MULTIDOC_DOCONLY , $sort @@ -368,26 +328,26 @@ class PhraseaEngine implements SearchEngineInterface $nbanswers += $results["nbanswers"]; } -// $logger = $session->get_logger($this->appbox->get_databox($sbas_id)); -// -// $conn2 = \connection::getPDOConnection($sbas_id); -// -// $sql3 = "INSERT INTO log_search -// (id, log_id, date, search, results, coll_id ) -// VALUES -// (null, :log_id, :date, :query, :nbresults, :colls)"; -// -// $params = array( -// ':log_id' => $logger->get_id() -// , ':date' => $dateLog -// , ':query' => $query -// , ':nbresults' => $results["nbanswers"] -// , ':colls' => implode(',', $this->colls[$sbas_id]) -// ); -// -// $stmt = $conn2->prepare($sql3); -// $stmt->execute($params); -// $stmt->closeCursor(); + $logger = $session->get_logger($appbox->get_databox($sbas_id)); + + $conn2 = \connection::getPDOConnection($sbas_id); + + $sql3 = "INSERT INTO log_search + (id, log_id, date, search, results, coll_id ) + VALUES + (null, :log_id, :date, :query, :nbresults, :colls)"; + + $params = array( + ':log_id' => $logger->get_id() + , ':date' => $dateLog + , ':query' => $query + , ':nbresults' => $results["nbanswers"] + , ':colls' => implode(',', $this->colls[$sbas_id]) + ); + + $stmt = $conn2->prepare($sql3); + $stmt->execute($params); + $stmt->closeCursor(); } $sql = 'UPDATE cache @@ -396,18 +356,16 @@ class PhraseaEngine implements SearchEngineInterface $params = array( 'query' => $query, - ':ses_id' => $this->app['session']->get('phrasea_session_id'), + ':ses_id' => $session->get_ses_id(), ':duration' => $total_time, ':total' => $nbanswers, ); - $stmt = $this->app['phraseanet.appbox']->get_connection()->prepare($sql); + $stmt = $appbox->get_connection()->prepare($sql); $stmt->execute($params); $stmt->closeCursor(); - if ($this->app['phraseanet.user']) { - \User_Adapter::saveQuery($this->app, $query); - } + \User_Adapter::saveQuery($query); return $this; } @@ -427,17 +385,19 @@ class PhraseaEngine implements SearchEngineInterface { $ret = array(); + $appbox = \appbox::get_instance(\bootstrap::getCore()); + $session = $appbox->get_session(); $res = phrasea_fetch_results( - $this->app['session']->get('phrasea_session_id'), ($record->get_number() + 1), 1, true, "[[em]]", "[[/em]]" + $session->get_ses_id(), ($record->get_number() + 1), 1, true, "[[em]]", "[[/em]]" ); - if (!isset($res['results']) || !is_array($res['results'])) { + if ( ! isset($res['results']) || ! is_array($res['results'])) { return array(); } $rs = $res['results']; $res = array_shift($rs); - if (!isset($res['xml'])) { + if ( ! isset($res['xml'])) { return array(); } @@ -484,8 +444,8 @@ class PhraseaEngine implements SearchEngineInterface if ($status) { $requestStat = 'xxxx'; - for ($i = 4; ($i <= 32); $i++) { - if (!isset($status[$i])) { + for ($i = 4; ($i <= 64); $i ++ ) { + if ( ! isset($status[$i])) { $requestStat = 'x' . $requestStat; continue; } @@ -507,9 +467,7 @@ class PhraseaEngine implements SearchEngineInterface } } if ($this->options->fields()) { - $this->queries[$sbas] .= ' IN (' . implode(' OR ', array_map(function(\databox_field $field) { - return $field->get_name(); - }, $this->options->fields())) . ')'; + $this->queries[$sbas] .= ' IN (' . implode(' OR ', $this->options->fields()) . ')'; } if (($this->options->getMinDate() || $this->options->getMaxDate()) && $this->options->getDateFields()) { if ($this->options->getMinDate()) { @@ -568,7 +526,7 @@ class PhraseaEngine implements SearchEngineInterface private function singleParse($sbas, $query) { - $this->qp[$sbas] = new PhraseaEngineQueryParser($this->app, $this->options->getLocale()); + $this->qp[$sbas] = new PhraseaEngineQueryParser($this->options->getLocale()); $this->qp[$sbas]->debug = false; if ($sbas == 'main') { @@ -586,39 +544,5 @@ class PhraseaEngine implements SearchEngineInterface return $this; } - - /** - * @inheritdoc - */ - public function clearCache() - { - if ($this->app['session']->has('phrasea_session_id')) { - $this->initialize(); - phrasea_close_session($this->app['session']->get('phrasea_session_id')); - $this->app['session']->remove('phrasea_session_id'); - } - } - - /** - * @inheritdoc - */ - public function clearAllCache(\DateTime $date = null) - { - if (!$date) { - $date = new \DateTime(); - } - - $sql = "SELECT session_id FROM cache WHERE lastaccess <= :date"; - - $stmt = $this->app['phraseanet.appbox']->get_connection()->prepare($sql); - $stmt->execute(array(':date' => $date->format(DATE_ISO8601))); - $rs = $stmt->fetchAll(\PDO::FETCH_ASSOC); - $stmt->closeCursor(); - - foreach ($rs as $row) { - phrasea_close_session($row['session_id']); - } - - return $this; - } } + diff --git a/lib/Alchemy/Phrasea/SearchEngine/Phrasea/PhraseaEngineQueryParser.php b/lib/Alchemy/Phrasea/SearchEngine/Phrasea/PhraseaEngineQueryParser.php index 75fb130fdb..322656c15d 100644 --- a/lib/Alchemy/Phrasea/SearchEngine/Phrasea/PhraseaEngineQueryParser.php +++ b/lib/Alchemy/Phrasea/SearchEngine/Phrasea/PhraseaEngineQueryParser.php @@ -11,8 +11,6 @@ namespace Alchemy\Phrasea\SearchEngine\Phrasea; -use Alchemy\Phrasea\Application; - /** * * @package searchEngine @@ -86,17 +84,15 @@ class PhraseaEngineQueryParser */ public $proposals = Array("QRY" => "", "BASES" => array(), "QUERIES" => array()); - public $app; /** * Current language for thesaurus * @var */ - public $lng; + public $lng = null; protected $unicode; - public function __construct(Application $app, $lng = "???") + public function __construct($lng = "???") { - $this->app = $app; $this->lng = $lng; $this->unicode = new \unicode(); @@ -116,7 +112,7 @@ class PhraseaEngineQueryParser public function parsequery($phq) { if ($this->debug) { - for ($i = 0; $i < mb_strlen($phq, 'UTF-8'); $i++) { + for ($i = 0; $i < mb_strlen($phq, 'UTF-8'); $i ++ ) { $c = mb_substr($phq, $i, 1, 'UTF-8'); printf("// %s : '%s' (%d octets)\n", $i, $c, strlen($c)); } @@ -353,7 +349,7 @@ class PhraseaEngineQueryParser public function priority_opk(&$tree, $depth = 0) { - if (!$tree) { + if ( ! $tree) { return; } @@ -373,7 +369,7 @@ class PhraseaEngineQueryParser public function distrib_opk(&$tree, $depth = 0) { - if (!$tree) { + if ( ! $tree) { return; } @@ -400,7 +396,7 @@ class PhraseaEngineQueryParser public function thesaurus2_apply(&$tree, $bid) { - if (!$tree) { + if ( ! $tree) { return; } @@ -459,7 +455,7 @@ class PhraseaEngineQueryParser { if ($depth == 0) $ret = $tree; - if (!$useThesaurus) { + if ( ! $useThesaurus) { return; // full-text only : inchangé } @@ -475,7 +471,7 @@ class PhraseaEngineQueryParser if (isset($tree["RB"]["CONTEXT"])) $copy["CONTEXT"] = $tree["CONTEXT"] = $tree["RB"]["CONTEXT"]; else - if (!$keepfuzzy) + if ( ! $keepfuzzy) $copy["CONTEXT"] = $tree["CONTEXT"] = "*"; $copy["RB"]["SREF"] = &$tree["RB"]; @@ -536,7 +532,7 @@ class PhraseaEngineQueryParser if ($context !== null) $tmp["RB"]["CONTEXT"] = $context; else - if (!$keepfuzzy) + if ( ! $keepfuzzy) $tmp["RB"]["CONTEXT"] = "*"; // corrige les profondeurs des 2 copies du 'simple' d'origine $tmp["LB"]["DEPTH"] += 1; @@ -563,7 +559,7 @@ class PhraseaEngineQueryParser if ($context !== null) $tmp["CONTEXT"] = $context; else - if (!$keepfuzzy) + if ( ! $keepfuzzy) $tmp["CONTEXT"] = "*"; // corrige la profondeur de la copie du 'simple' d'origine $tmp["RB"]["DEPTH"] += 1; @@ -583,7 +579,7 @@ class PhraseaEngineQueryParser if ($depth == 0) $this->proposals["BASES"]["b$bid"] = array("BID" => $bid, "NAME" => $name, "TERMS" => array()); - if (!$tree) { + if ( ! $tree) { return(0); } @@ -608,7 +604,7 @@ class PhraseaEngineQueryParser for ($n = $node->firstChild; $n; $n = $n->nextSibling) { if ($n->nodeName == "sy") { $lng = $n->getAttribute("lng"); - if (!array_key_exists($lng, $tsy)) + if ( ! array_key_exists($lng, $tsy)) $tsy[$lng] = array(); $zsy = array("v" => $n->getAttribute("v"), "w" => $n->getAttribute("w"), "k" => $n->getAttribute("k")); @@ -625,15 +621,15 @@ class PhraseaEngineQueryParser foreach ($tsy as $lng => $tsy2) { foreach ($tsy2 as $sy) { $alt .= $alt ? "\n" : ""; - $alt .= "" . $lng . ": " . \p4string::MakeString($sy["v"], "js"); + $alt .= "" . $lng . ": " . p4string::MakeString($sy["v"], "js"); } } $this->proposals['QUERIES'][$syfound["w"]] = $syfound["w"]; $thtml = $syfound["v"]; - $kjs = $syfound["k"] ? ("'" . \p4string::MakeString($syfound["k"], "js") . "'") : "null"; - $wjs = "'" . \p4string::MakeString($syfound["w"], "js") . "'"; + $kjs = $syfound["k"] ? ("'" . p4string::MakeString($syfound["k"], "js") . "'") : "null"; + $wjs = "'" . p4string::MakeString($syfound["w"], "js") . "'"; if ($node->getAttribute("term")) { $thtml = "" . $thtml . ""; @@ -660,7 +656,7 @@ class PhraseaEngineQueryParser } } $n->removeAttribute("marked"); - for ($i = 0; array_key_exists($syfound . $i, $tsort) && $i < 9999; $i++) + for ($i = 0; array_key_exists($syfound . $i, $tsort) && $i < 9999; $i ++ ) ; $tsort[$syfound . $i] = $n; } @@ -733,7 +729,7 @@ class PhraseaEngineQueryParser $t = $w = implode(" ", $w); if (isset($tree["CONTEXT"])) { - if (!$tree["CONTEXT"]) { + if ( ! $tree["CONTEXT"]) { $x0 = "@w=\"" . $w . "\" and not(@k)"; } else { if ($tree["CONTEXT"] == "*") { @@ -752,10 +748,10 @@ class PhraseaEngineQueryParser if ($this->debug) printf("searching thesaurus with xpath='%s'
\n", $x); - $dxp = new \DOMXPath($domthe); + $dxp = new DOMXPath($domthe); $nodes = $dxp->query($x); - if (!isset($tree["RB"]["SREF"]["TIDS"])) + if ( ! isset($tree["RB"]["SREF"]["TIDS"])) $tree["RB"]["SREF"]["TIDS"] = array(); if ($nodes->length >= 1) { if ($nodes->length == 1) { @@ -766,8 +762,8 @@ class PhraseaEngineQueryParser // on cherche plusieurs id's, on utilisera la syntaxe 'regexp' (l'extension repérera elle meme la syntaxe car la value finira par '$') $val = ""; foreach ($nodes as $node) { - if (!isset($tree["CONTEXT"])) - $ambigus++; + if ( ! isset($tree["CONTEXT"])) + $ambigus ++; $this->addtoTIDS($tree["RB"], $bid, $node); } } @@ -777,15 +773,74 @@ class PhraseaEngineQueryParser $this->proposals["BASES"]["b$bid"]["TERMS"][$path]["HTML"] = $prophtml; } else { // le mot n'est pas dans le thesaurus - $tree = null; } return($ambigus); } + /* + function dead_setTids(&$tree, &$simple, $bid, &$domthe, $searchsynonyms) + { + // if($this->debug) + print("setTids:\n\$tree=" . var_export($tree, true) . "\n"); + + $ambigus = 0; + if(is_array($w = $simple["VALUE"])) + $t = $w = implode(" ", $w); + + if (isset($tree["CONTEXT"])) { + if (!$tree["CONTEXT"]) { + $x0 = "@w=\"" . $w ."\" and not(@k)"; + } else { + if ($tree["CONTEXT"]=="*") { + $x0 = "@w=\"" . $w ."\""; + } else { + $x0 = "@w=\"" . $w ."\" and @k=\"" . $tree["CONTEXT"] . "\""; + $t .= " (" . $tree["CONTEXT"] . ")"; + } + } + } else { + $x0 = "@w=\"" . $w ."\""; + } + + $x = "/thesaurus//sy[" . $x0 ."]"; + + if($this->debug) + printf("searching thesaurus with xpath='%s'
\n", $x); + + $dxp = new DOMXPath($domthe); + $nodes = $dxp->query($x); + + if(!isset($tree["RB"]["SREF"]["TIDS"])) + $tree["RB"]["SREF"]["TIDS"] = array(); + if ($nodes->length >= 1) { + if ($nodes->length == 1) { + // on cherche un id simple, on utilisera la syntaxe sql 'like' (l'extension repérera elle méme la syntaxe car la value finira par '%') + $this->addtoTIDS($tree["RB"], $bid, $nodes->item(0)); + // $this->thesaurusDOMNodes[] = $nodes->item(0); + } else { + // on cherche plusieurs id's, on utilisera la syntaxe 'regexp' (l'extension repérera elle meme la syntaxe car la value finira par '$') + $val = ""; + foreach ($nodes as $node) { + if(!isset($tree["CONTEXT"])) + $ambigus++; + $this->addtoTIDS($tree["RB"], $bid, $node); + } + } + $path = $tree["RB"]["SREF"]["PATH"]; + $prophtml = ""; + $this->propAsHTML($domthe->documentElement, $prophtml, $path); + $this->proposals["TERMS"][$path]["HTML"] = $prophtml; + } else { + // le mot n'est pas dans le thesaurus + } + + return($ambigus); + } + */ public function containsColonOperator(&$tree) { - if (!$tree) { + if ( ! $tree) { return(false); } if ($tree["CLASS"] == "OPK" && $tree["NODETYPE"] == PHRASEA_OP_COLON && ($tree["RB"]["CLASS"] == "SIMPLE" || $tree["RB"]["CLASS"] == "QSIMPLE")) { @@ -813,7 +868,7 @@ class PhraseaEngineQueryParser if ($this->debug) printf("found node id='%s', v='%s' w='%s', k='%s', p='%s' for node-path=%s \n", $id, $DOMnode->getAttribute("v"), $w, $k, $p, $path); - if (!$k) + if ( ! $k) $k = null; $found = false; @@ -832,11 +887,11 @@ class PhraseaEngineQueryParser // } } } - if (!$found) + if ( ! $found) $extendednode["SREF"]["TIDS"][] = array("bid" => $bid, "pid" => $pid, "id" => $id, "w" => $w, "k" => $k, "lng" => $lng, "p" => $p); // on liste les propositions de thésaurus pour ce node (dans l'arbre simple) - if (!isset($this->proposals["BASES"]["b$bid"]["TERMS"][$path])) { + if ( ! isset($this->proposals["BASES"]["b$bid"]["TERMS"][$path])) { // $this->proposals["TERMS"][$path] = array("TERM"=>implode(" ", $extendednode["VALUE"]), "PROPOSALS"=>array()); $term = implode(" ", $extendednode["VALUE"]); if (isset($extendednode["CONTEXT"]) && $extendednode["CONTEXT"]) { @@ -857,7 +912,7 @@ class PhraseaEngineQueryParser // puis par remonter au père for ($node = $DOMnode->parentNode; $node && $node->nodeType == XML_ELEMENT_NODE && $node->parentNode; $node = $node->parentNode) { $id = $node->getAttribute("id"); - if (!$id) + if ( ! $id) break; // on a dépassé la racine du thésaurus $node->setAttribute("marked", "1"); } @@ -880,7 +935,7 @@ class PhraseaEngineQueryParser $txt .= $tab . "\t\"" . $w . ""; $txt .= $tab . "\t\""; $txt .= $tab . "\n"; - $ambiguites["n"]++; + $ambiguites["n"] ++; } else { if (isset($tree["CONTEXT"])) $w .= "[" . $tree["CONTEXT"] . "]"; @@ -915,7 +970,7 @@ class PhraseaEngineQueryParser public function get_ambigu(&$tree, $mouseCallback = "void", $depth = 0) { - if (!$tree) { + if ( ! $tree) { return(""); } @@ -937,22 +992,22 @@ class PhraseaEngineQueryParser public function set_default(&$tree, &$emptyw, $depth = 0) { - if (!$tree) { + if ( ! $tree) { return(true); } if ($tree["CLASS"] == "OPS" || $tree["CLASS"] == "OPK") { if ($tree["CLASS"] == "OPS") { - if (!$this->set_default($tree["LB"], $emptyw, $depth + 1)) { + if ( ! $this->set_default($tree["LB"], $emptyw, $depth + 1)) { return(false); } - if (!$this->set_default($tree["RB"], $emptyw, $depth + 1)) { + if ( ! $this->set_default($tree["RB"], $emptyw, $depth + 1)) { return(false); } } else { // OPK ! // jy 20041223 : ne pas appliquer d'op. par def. derriere un op arith. // ex : "d < 1/2/2003" : grouper la liste "1","2","2004" en "mot" unique - if (!$tree["LB"] || ($tree["LB"]["CLASS"] != "SIMPLE" && $tree["LB"]["CLASS"] != "QSIMPLE") || (is_array($tree["LB"]["VALUE"]) && count($tree["LB"]["VALUE"]) != 1)) { + if ( ! $tree["LB"] || ($tree["LB"]["CLASS"] != "SIMPLE" && $tree["LB"]["CLASS"] != "QSIMPLE") || (is_array($tree["LB"]["VALUE"]) && count($tree["LB"]["VALUE"]) != 1)) { // un op. arith. doit étre précédé d'un seul nom de champ if ($this->errmsg != "") $this->errmsg .= sprintf("\\n"); @@ -960,7 +1015,7 @@ class PhraseaEngineQueryParser return(false); } - if (!$tree["RB"] || ($tree["RB"]["CLASS"] != "SIMPLE" && $tree["RB"]["CLASS"] != "QSIMPLE")) { + if ( ! $tree["RB"] || ($tree["RB"]["CLASS"] != "SIMPLE" && $tree["RB"]["CLASS"] != "QSIMPLE")) { // un op. arith. doit étre suivi d'une valeur if ($this->errmsg != "") $this->errmsg .= sprintf("\\n"); @@ -979,21 +1034,21 @@ class PhraseaEngineQueryParser /** gestion des branches null * a revoir car ca ppete pas d'erreur mais corrige automatiquement * ** */ - if (!isset($tree["RB"])) + if ( ! isset($tree["RB"])) $tree = $tree["LB"]; else - if (!isset($tree["LB"])) + if ( ! isset($tree["LB"])) $tree = $tree["RB"]; } else { if (($tree["CLASS"] == "SIMPLE" || $tree["CLASS"] == "QSIMPLE")) { if (is_array($tree["VALUE"])) { $treetmp = null; $pnum = 0; - for ($i = 0; $i < count($tree["VALUE"]); $i++) { + for ($i = 0; $i < count($tree["VALUE"]); $i ++ ) { // gestion mot vide if (isset($emptyw[$tree["VALUE"][$i]]) || $tree["VALUE"][$i] == "?" || $tree["VALUE"][$i] == "*") { // on a forcé les '?' ou '*' isolés comme des mots vides - $pnum++; + $pnum ++; } else { if ($treetmp == null) { $treetmp = array("CLASS" => $tree["CLASS"], @@ -1052,7 +1107,7 @@ class PhraseaEngineQueryParser unset($tree["LB"]); unset($tree["RB"]); unset($tree["PNUM"]); - $nmodif++; + $nmodif ++; } else { $nmodif += $this->factor_or2($tree["LB"], $depth + 1); $nmodif += $this->factor_or2($tree["RB"], $depth + 1); @@ -1166,7 +1221,7 @@ class PhraseaEngineQueryParser } $tmp = $onedate; - if (!is_array($tmp)) + if ( ! is_array($tmp)) $tmp = explode(" ", $tmp); switch (sizeof($tmp)) { @@ -1420,7 +1475,7 @@ class PhraseaEngineQueryParser if ($inquote) { // quand on est entre guillements les tokens perdent leur signification $tree = $this->addtotree($tree, $t, $depth, $inquote); - if (!$tree) { + if ( ! $tree) { return(null); } } else { @@ -1439,16 +1494,16 @@ class PhraseaEngineQueryParser if ($inquote) { // quand on est entre guillements les tokens perdent leur signification $tree = $this->addtotree($tree, $t, $depth, $inquote); - if (!$tree) { + if ( ! $tree) { return(null); } } else { // '(' : appel récursif - if (!$tree) + if ( ! $tree) $tree = $this->maketree($depth + 1); else { if (($tree["CLASS"] == "OPS" || $tree["CLASS"] == "OPK") && $tree["RB"] == null) { $tree["RB"] = $this->maketree($depth + 1); - if (!$tree["RB"]) + if ( ! $tree["RB"]) $tree = null; } else { // ici on applique l'opérateur par défaut @@ -1461,7 +1516,7 @@ class PhraseaEngineQueryParser "RB" => $this->maketree($depth + 1)); } } - if (!$tree) { + if ( ! $tree) { return(null); } } @@ -1490,12 +1545,12 @@ class PhraseaEngineQueryParser print("OPENING QUOTE!
"); } // ouverture des guillemets -> récursivité - if (!$tree) + if ( ! $tree) $tree = $this->maketree($depth + 1, true); else { if (($tree["CLASS"] == "OPS" || $tree["CLASS"] == "OPK") && $tree["RB"] == null) { $tree["RB"] = $this->maketree($depth + 1, true); - if (!$tree["RB"]) + if ( ! $tree["RB"]) $tree = null; } else { // ici on applique l'opérateur par défaut @@ -1508,7 +1563,7 @@ class PhraseaEngineQueryParser "RB" => $this->maketree($depth + 1, true)); } } - if (!$tree) { + if ( ! $tree) { return(null); } } @@ -1520,7 +1575,7 @@ class PhraseaEngineQueryParser var_dump($tree); print("-------------------------\n"); } - if (!$tree) { + if ( ! $tree) { return(null); } break; @@ -1545,7 +1600,7 @@ class PhraseaEngineQueryParser print("-------------------------\n"); } - if (!$t) { + if ( ! $t) { return($tree); } @@ -1559,7 +1614,7 @@ class PhraseaEngineQueryParser // un [xxx] suit un terme : il introduit un contexte $tree["CONTEXT"] = $t["VALUE"]; } elseif ($tree["CLASS"] == "OPS" || $tree["CLASS"] == "OPK") { - if (!isset($tree["RB"]) || !$tree["RB"]) { + if ( ! isset($tree["RB"]) || ! $tree["RB"]) { // un [xxx] peut suivre un opérateur, c'est un paramétre normalement numérique $tree["PNUM"] = $t["VALUE"]; } else { @@ -1586,7 +1641,7 @@ class PhraseaEngineQueryParser break; case "TOK_CMP": // < > <= >= <> = : sont des opérateurs de comparaison - if (!$tree) { + if ( ! $tree) { // printf("\nUne question ne peut commencer par '" . $t["VALUE"] . "'
"); if ($this->errmsg != "") $this->errmsg .= "\\n"; @@ -1606,9 +1661,9 @@ class PhraseaEngineQueryParser return(array("CLASS" => "OPK", "VALUE" => $t["VALUE"], "NODETYPE" => $this->opk[$t["VALUE"]]["NODETYPE"], "PNUM" => null, "DEPTH" => $depth, "LB" => $tree, "RB" => null)); break; case "TOK_WORD": - if ($t["CLASS"] == "TOK_WORD" && isset($this->ops[$t["VALUE"]]) && !$inquote) { + if ($t["CLASS"] == "TOK_WORD" && isset($this->ops[$t["VALUE"]]) && ! $inquote) { // ce mot est un opérateur phrasea - if (!$tree) { + if ( ! $tree) { // printf("\n581 : Une question ne peut commencer par un opérateur
"); if ($this->errmsg != "") $this->errmsg .= "\\n"; @@ -1641,7 +1696,7 @@ class PhraseaEngineQueryParser // ce mot n'est pas un opérateur $pnum = null; $nodetype = PHRASEA_KEYLIST; - if ($t["CLASS"] == "TOK_WORD" && isset($this->spw[$t["VALUE"]]) && !$inquote) { + if ($t["CLASS"] == "TOK_WORD" && isset($this->spw[$t["VALUE"]]) && ! $inquote) { // mais c'est un mot 'spécial' de phrasea ('last', 'all') $type = $this->spw[$t["VALUE"]]["CLASS"]; $nodetype = $this->spw[$t["VALUE"]]["NODETYPE"]; @@ -1668,24 +1723,25 @@ class PhraseaEngineQueryParser public function addsimple($t, $type, $nodetype, $pnum, $tree, $depth) { $nok = 0; + $registry = \registry::get_instance(); $w = $t["VALUE"]; if ($w != "?" && $w != "*") { // on laisse passer les 'isolés' pour les traiter plus tard comme des mots vides - for ($i = 0; $i < strlen($w); $i++) { + for ($i = 0; $i < strlen($w); $i ++ ) { $c = substr($w, $i, 1); if ($c == "?" || $c == "*") { - if ($nok < $this->app['phraseanet.registry']->get('GV_min_letters_truncation')) { + if ($nok < $registry->get('GV_min_letters_truncation')) { if ($this->errmsg != "") $this->errmsg .= sprintf("\\n"); - $this->errmsg .= _('qparser:: Formulation incorrecte, necessite plus de caractere : ') . "
" . $this->app['phraseanet.registry']->get('GV_min_letters_truncation'); + $this->errmsg .= _('qparser:: Formulation incorrecte, necessite plus de caractere : ') . "
" . $registry->get('GV_min_letters_truncation'); return(null); } // $nok = 0; } else - $nok++; + $nok ++; } } - if (!$tree) { + if ( ! $tree) { return(array("CLASS" => $type, "NODETYPE" => $nodetype, "VALUE" => array($t["VALUE"]), "PNUM" => $pnum, "DEPTH" => $depth)); } switch ($tree["CLASS"]) { @@ -1870,8 +1926,8 @@ class PhraseaEngineQueryParser $l = mb_strlen($this->phq, 'UTF-8'); $t = ""; $c_utf8 = ""; - for ($i = 0; $i < $l; $i++) { - if (!$this->unicode->has_indexer_bad_char(($c_utf8 = mb_substr($this->phq, $i, 1, 'UTF-8')))) { + for ($i = 0; $i < $l; $i ++ ) { + if ( ! $this->unicode->has_indexer_bad_char(($c_utf8 = mb_substr($this->phq, $i, 1, 'UTF-8')))) { // $c = mb_strtolower($c); // $t .= isset($this->noaccent[$c]) ? $this->noaccent[$c] : $c; $t .= $this->unicode->remove_diacritics(mb_strtolower($c_utf8)); diff --git a/lib/Alchemy/Phrasea/SearchEngine/PhraseaEngine.php b/lib/Alchemy/Phrasea/SearchEngine/PhraseaEngine.php deleted file mode 100644 index bdc8644aff..0000000000 --- a/lib/Alchemy/Phrasea/SearchEngine/PhraseaEngine.php +++ /dev/null @@ -1,517 +0,0 @@ -options = new SearchEngineOptions(); - } - - /** - * {@inheritdoc} - */ - public function status() - { - return true; - } - - /** - * {@inheritdoc} - */ - public function availableTypes() - { - return array(self::GEM_TYPE_RECORD, self::GEM_TYPE_STORY); - } - - /** - * {@inheritdoc} - */ - public function addRecord(\record_adapter $record) - { - return $this->updateRecord($record); - } - - /** - * {@inheritdoc} - */ - public function removeRecord(\record_adapter $record) - { - $connbas = $record->get_databox()->get_connection(); - - $sql = "DELETE FROM prop WHERE record_id = :record_id"; - $stmt = $connbas->prepare($sql); - $stmt->execute(array(':record_id' => $record->get_record_id())); - $stmt->closeCursor(); - - $sql = "DELETE FROM idx WHERE record_id = :record_id"; - $stmt = $connbas->prepare($sql); - $stmt->execute(array(':record_id' => $record->get_record_id())); - $stmt->closeCursor(); - - $sql = "DELETE FROM thit WHERE record_id = :record_id"; - $stmt = $connbas->prepare($sql); - $stmt->execute(array(':record_id' => $record->get_record_id())); - $stmt->closeCursor(); - - unset($stmt, $connbas); - - return $this; - } - - /** - * {@inheritdoc} - */ - public function updateRecord(\record_adapter $record) - { - $record->set_binary_status(\databox_status::dec2bin(bindec($record->get_status()) & ~7 | 4)); - - return $this; - } - - /** - * {@inheritdoc} - */ - public function addStory(\record_adapter $record) - { - return $this->updateRecord($record); - } - - /** - * {@inheritdoc} - */ - public function removeStory(\record_adapter $record) - { - return $this->removeRecord($record); - } - - /** - * {@inheritdoc} - */ - public function updateStory(\record_adapter $record) - { - return $this->updateRecord($record); - } - - /** - * {@inheritdoc} - */ - public function addFeedEntry(\Feed_Entry_Adapter $entry) - { - throw new RuntimeException('Feed Entry indexing not supported by Phrasea Engine'); - } - - /** - * {@inheritdoc} - */ - public function removeFeedEntry(\Feed_Entry_Adapter $entry) - { - throw new RuntimeException('Feed Entry indexing not supported by Phrasea Engine'); - } - - /** - * {@inheritdoc} - */ - public function updateFeedEntry(\Feed_Entry_Adapter $entry) - { - throw new RuntimeException('Feed Entry indexing not supported by Phrasea Engine'); - } - - /** - * {@inheritdoc} - */ - public function setOptions(SearchEngineOptions $options) - { - $this->options = $options; - } - - /** - * {@inheritdoc} - */ - public function resetOptions() - { - $this->options = new SearchEngineOptions(); - } - - /** - * {@inheritdoc} - */ - public function query($query, $offset, $perPage) - { - assert(is_int($offset)); - assert($offset >= 0); - assert(is_int($perPage)); - - if (trim($query) === '') { - $query = "all"; - } - - if ($this->options->getRecordType()) { - $query .= ' AND recordtype=' . $this->options->getRecordType(); - } - - $appbox = \appbox::get_instance(\bootstrap::getCore()); - $session = $appbox->get_session(); - - $sql = 'SELECT query, query_time, duration, total FROM cache WHERE session_id = :ses_id'; - $stmt = $appbox->get_connection()->prepare($sql); - $stmt->execute(array(':ses_id' => $session->get_ses_id())); - $row = $stmt->fetch(\PDO::FETCH_ASSOC); - $stmt->closeCursor(); - - $date_obj = new \DateTime('-10 min'); - $date_quest = new \DateTime($row['query_time']); - - if ($query != $row['query']) { - $this->resetCacheNextQuery = true; - } - if ($date_obj > $date_quest) { - $this->resetCacheNextQuery = true; - } - - if ($this->resetCacheNextQuery === true) { - phrasea_clear_cache($session->get_ses_id()); - $this->addQuery($query); - $this->executeQuery($query); - - $sql = 'SELECT query, query_time, duration, total FROM cache WHERE session_id = :ses_id'; - $stmt = $appbox->get_connection()->prepare($sql); - $stmt->execute(array(':ses_id' => $session->get_ses_id())); - $row = $stmt->fetch(\PDO::FETCH_ASSOC); - $stmt->closeCursor(); - } else { - /** - * @todo clean this in DB - */ - $this->total_available = $this->total_results = $session->get_session_prefs('phrasea_engine_n_results'); - } - - $res = phrasea_fetch_results( - $session->get_ses_id(), $offset + 1, $perPage, false - ); - - $rs = array(); - $error = _('Unable to execute query'); - - if (isset($res['results']) && is_array($res['results'])) { - $rs = $res['results']; - $error = ''; - } - - $resultNumber = $offset; - $records = new ArrayCollection(); - - foreach ($rs as $data) { - try { - $records->add(new \record_adapter( - \phrasea::sbasFromBas($data['base_id']), - $data['record_id'], - $resultNumber - )); - } catch (Exception $e) { - - } - $resultNumber ++; - } - - - return new SearchEngineResult($records, $query, $row['duration'], $offset, $row['total'], $row['total'], $error, '', new ArrayCollection(), new ArrayCollection(), ''); - } - - /** - * {@inheritdoc} - */ - private function executeQuery($query) - { - $appbox = \appbox::get_instance(\bootstrap::getCore()); - $session = $appbox->get_session(); - $registry = $appbox->get_registry(); - - $dateLog = date("Y-m-d H:i:s"); - $nbanswers = $total_time = 0; - $sort = ''; - - if ($this->options->sortBy()) { - switch ($this->options->sortOrder()) { - case SearchEngineOptions::SORT_MODE_ASC: - $sort = '+'; - break; - case SearchEngineOptions::SORT_MODE_DESC: - default: - $sort = '-'; - break; - } - $sort .= '0' . $this->options->sortBy(); - } - - foreach ($this->queries as $sbas_id => $qry) { - $BF = array(); - - foreach ($this->options->businessFieldsOn() as $collection) { - $BF[] = $collection->get_base_id(); - } - - $results = phrasea_query2( - $session->get_ses_id() - , $sbas_id - , $this->colls[$sbas_id] - , $this->arrayq[$sbas_id] - , $registry->get('GV_sit') - , (string) $session->get_usr_id() - , false - , $this->options->searchType() == SearchEngineOptions::RECORD_GROUPING ? PHRASEA_MULTIDOC_REGONLY : PHRASEA_MULTIDOC_DOCONLY - , $sort - , $BF - ); - - if ($results) { - $total_time += $results['time_all']; - $nbanswers += $results["nbanswers"]; - } - - $logger = $session->get_logger($appbox->get_databox($sbas_id)); - - $conn2 = \connection::getPDOConnection($sbas_id); - - $sql3 = "INSERT INTO log_search - (id, log_id, date, search, results, coll_id ) - VALUES - (null, :log_id, :date, :query, :nbresults, :colls)"; - - $params = array( - ':log_id' => $logger->get_id() - , ':date' => $dateLog - , ':query' => $query - , ':nbresults' => $results["nbanswers"] - , ':colls' => implode(',', $this->colls[$sbas_id]) - ); - - $stmt = $conn2->prepare($sql3); - $stmt->execute($params); - $stmt->closeCursor(); - } - - $sql = 'UPDATE cache - SET query = :query, query_time = NOW(), duration = :duration, total = :total - WHERE session_id = :ses_id'; - - $params = array( - 'query' => $query, - ':ses_id' => $session->get_ses_id(), - ':duration' => $total_time, - ':total' => $nbanswers, - ); - - $stmt = $appbox->get_connection()->prepare($sql); - $stmt->execute($params); - $stmt->closeCursor(); - - \User_Adapter::saveQuery($query); - - return $this; - } - - /** - * {@inheritdoc} - */ - public function autocomplete($query) - { - return new ArrayCollection(); - } - - /** - * {@inheritdoc} - */ - public function excerpt($query, $fields, \record_adapter $record) - { - $ret = array(); - - $appbox = \appbox::get_instance(\bootstrap::getCore()); - $session = $appbox->get_session(); - $res = phrasea_fetch_results( - $session->get_ses_id(), ($record->get_number() + 1), 1, true, "[[em]]", "[[/em]]" - ); - - if ( ! isset($res['results']) || ! is_array($res['results'])) { - return array(); - } - - $rs = $res['results']; - $res = array_shift($rs); - if ( ! isset($res['xml'])) { - return array(); - } - - $sxe = simplexml_load_string($res['xml']); - - foreach ($fields as $name => $field) { - if ($sxe && $sxe->description && $sxe->description->$name) { - $val = array(); - foreach ($sxe->description->$name as $value) { - $val[] = str_replace(array('[[em]]', '[[/em]]'), array('', ''), (string) $value); - } - $separator = $field['separator'] ? $field['separator'][0] : ''; - $val = implode(' ' . $separator . ' ', $val); - } else { - $val = $field['value']; - } - - $ret[] = $val; - } - - return $ret; - } - - /** - * {@inheritdoc} - */ - public function resetCache() - { - $this->resetCacheNextQuery = true; - $this->queries = $this->arrayq = $this->colls = $this->qp = $this->needthesaurus = array(); - - return $this; - } - - private function addQuery($query) - { - foreach ($this->options->databoxes() as $databox) { - $this->queries[$databox->get_sbas_id()] = $query; - } - - $status = $this->options->getStatus(); - - foreach ($this->queries as $sbas => $qs) { - if ($status) { - $requestStat = 'xxxx'; - - for ($i = 4; ($i <= 64); $i ++ ) { - if ( ! isset($status[$i])) { - $requestStat = 'x' . $requestStat; - continue; - } - $val = 'x'; - if (isset($status[$i][$sbas])) { - if ($status[$i][$sbas] == '0') { - $val = '0'; - } elseif ($status[$i][$sbas] == '1') { - $val = '1'; - } - } - $requestStat = $val . $requestStat; - } - - $requestStat = ltrim($requestStat, 'x'); - - if ($requestStat !== '') { - $this->queries[$sbas] .= ' AND (recordstatus=' . $requestStat . ')'; - } - } - if ($this->options->fields()) { - $this->queries[$sbas] .= ' IN (' . implode(' OR ', $this->options->fields()) . ')'; - } - if (($this->options->getMinDate() || $this->options->getMaxDate()) && $this->options->getDateFields()) { - if ($this->options->getMinDate()) { - $this->queries[$sbas] .= ' AND ( ' . implode(' >= ' . $this->options->getMinDate()->format('Y-m-d') . ' OR ', $this->options->getDateFields()) . ' >= ' . $this->options->getMinDate()->format('Y-m-d') . ' ) '; - } - if ($this->options->getMaxDate()) { - $this->queries[$sbas] .= ' AND ( ' . implode(' <= ' . $this->options->getMaxDate()->format('Y-m-d') . ' OR ', $this->options->getDateFields()) . ' <= ' . $this->options->getMaxDate()->format('Y-m-d') . ' ) '; - } - } - } - - $this->singleParse('main', $query); - - foreach ($this->queries as $sbas => $db_query) { - $this->singleParse($sbas, $query); - } - - $base_ids = array_map(function(\collection $collection) { - return $collection->get_base_id(); - }, $this->options->collections()); - - foreach ($this->options->databoxes() as $databox) { - $sbas_id = $databox->get_sbas_id(); - - $this->colls[$sbas_id] = array(); - - foreach ($databox->get_collections() as $collection) { - if (in_array($collection->get_base_id(), $base_ids)) { - $this->colls[$sbas_id][] = $collection->get_base_id(); - } - } - - if (sizeof($this->colls[$sbas_id]) <= 0) { - continue; - } - - if ($this->needthesaurus[$sbas_id]) { - if ($databox->get_dom_thesaurus()) { - $this->qp[$sbas_id]->thesaurus2($this->indep_treeq[$sbas_id], $sbas_id, $databox->get_dbname(), $databox->get_dom_thesaurus(), true); - $this->qp['main']->thesaurus2($this->indep_treeq['main'], $sbas_id, $databox->get_dbname(), $databox->get_dom_thesaurus(), true); - } - } - - $emptyw = false; - - $this->qp[$sbas_id]->set_default($this->indep_treeq[$sbas_id], $emptyw); - $this->qp[$sbas_id]->distrib_in($this->indep_treeq[$sbas_id]); - $this->qp[$sbas_id]->factor_or($this->indep_treeq[$sbas_id]); - $this->qp[$sbas_id]->setNumValue($this->indep_treeq[$sbas_id], $databox->get_sxml_structure()); - $this->qp[$sbas_id]->thesaurus2_apply($this->indep_treeq[$sbas_id], $sbas_id); - $this->arrayq[$sbas_id] = $this->qp[$sbas_id]->makequery($this->indep_treeq[$sbas_id]); - } - - return $this; - } - - private function singleParse($sbas, $query) - { - $this->qp[$sbas] = new PhraseaEngineQueryParser($this->options->getLocale()); - $this->qp[$sbas]->debug = false; - - if ($sbas == 'main') { - $simple_treeq = $this->qp[$sbas]->parsequery($query); - } else { - $simple_treeq = $this->qp[$sbas]->parsequery($this->queries[$sbas]); - } - - $this->qp[$sbas]->priority_opk($simple_treeq); - $this->qp[$sbas]->distrib_opk($simple_treeq); - $this->needthesaurus[$sbas] = false; - - $this->indep_treeq[$sbas] = $this->qp[$sbas]->extendThesaurusOnTerms($simple_treeq, true, true, false); - $this->needthesaurus[$sbas] = $this->qp[$sbas]->containsColonOperator($this->indep_treeq[$sbas]); - - return $this; - } -} - diff --git a/lib/Alchemy/Phrasea/SearchEngine/PhraseaEngineQueryParser.php b/lib/Alchemy/Phrasea/SearchEngine/PhraseaEngineQueryParser.php deleted file mode 100644 index 49b96bc8ab..0000000000 --- a/lib/Alchemy/Phrasea/SearchEngine/PhraseaEngineQueryParser.php +++ /dev/null @@ -1,1954 +0,0 @@ - array("NODETYPE" => PHRASEA_OP_AND, "CANNUM" => false), - "and" => array("NODETYPE" => PHRASEA_OP_AND, "CANNUM" => false), - "ou" => array("NODETYPE" => PHRASEA_OP_OR, "CANNUM" => false), - "or" => array("NODETYPE" => PHRASEA_OP_OR, "CANNUM" => false), - "sauf" => array("NODETYPE" => PHRASEA_OP_EXCEPT, "CANNUM" => false), - "except" => array("NODETYPE" => PHRASEA_OP_EXCEPT, "CANNUM" => false), - "pres" => array("NODETYPE" => PHRASEA_OP_NEAR, "CANNUM" => true), - "near" => array("NODETYPE" => PHRASEA_OP_NEAR, "CANNUM" => true), - "avant" => array("NODETYPE" => PHRASEA_OP_BEFORE, "CANNUM" => true), - "before" => array("NODETYPE" => PHRASEA_OP_BEFORE, "CANNUM" => true), - "apres" => array("NODETYPE" => PHRASEA_OP_AFTER, "CANNUM" => true), - "after" => array("NODETYPE" => PHRASEA_OP_AFTER, "CANNUM" => true), - "dans" => array("NODETYPE" => PHRASEA_OP_IN, "CANNUM" => false), - "in" => array("NODETYPE" => PHRASEA_OP_IN, "CANNUM" => false) - ); - public $opk = array( - "<" => array("NODETYPE" => PHRASEA_OP_LT, "CANNUM" => false), - ">" => array("NODETYPE" => PHRASEA_OP_GT, "CANNUM" => false), - "<=" => array("NODETYPE" => PHRASEA_OP_LEQT, "CANNUM" => false), - ">=" => array("NODETYPE" => PHRASEA_OP_GEQT, "CANNUM" => false), - "<>" => array("NODETYPE" => PHRASEA_OP_NOTEQU, "CANNUM" => false), - "=" => array("NODETYPE" => PHRASEA_OP_EQUAL, "CANNUM" => false), - ":" => array("NODETYPE" => PHRASEA_OP_COLON, "CANNUM" => false) - ); - public $spw = array( - "all" => array( - "CLASS" => "PHRASEA_KW_ALL", "NODETYPE" => PHRASEA_KW_ALL, "CANNUM" => false - ), - "last" => array( - "CLASS" => "PHRASEA_KW_LAST", "NODETYPE" => PHRASEA_KW_LAST, "CANNUM" => true - ), - // "first" => array("CLASS"=>PHRASEA_KW_FIRST, "CANNUM"=>true), - // "premiers" => array("CLASS"=>PHRASEA_KW_FIRST, "CANNUM"=>true), - "tout" => array( - "CLASS" => "PHRASEA_KW_ALL", "NODETYPE" => PHRASEA_KW_ALL, "CANNUM" => false - ), - "derniers" => array( - "CLASS" => "PHRASEA_KW_LAST", "NODETYPE" => PHRASEA_KW_LAST, "CANNUM" => true - ) - ); - public $quoted_defaultop = array( - "VALUE" => "default_avant", "NODETYPE" => PHRASEA_OP_BEFORE, "PNUM" => 0 - ); - public $defaultop = array( - "VALUE" => "and", "NODETYPE" => PHRASEA_OP_AND, "PNUM" => NULL - ); - public $defaultlast = 12; - public $phq; - public $errmsg = ""; - - /** - * - * @var boolean - */ - public $debug = false; - - /** - * un tableau qui contiendra des propositions de thesaurus - * pour les termes de l'arbre simple - * - * @var array - */ - public $proposals = Array("QRY" => "", "BASES" => array(), "QUERIES" => array()); - - /** - * Current language for thesaurus - * @var - */ - public $lng = null; - protected $unicode; - - public function __construct($lng = "???") - { - $this->lng = $lng; - $this->unicode = new \unicode(); - - return $this; - } - - public function mb_trim($s, $encoding) - { - return(trim($s)); - } - - public function mb_ltrim($s, $encoding) - { - return(ltrim($s)); - } - - public function parsequery($phq) - { - if ($this->debug) { - for ($i = 0; $i < mb_strlen($phq, 'UTF-8'); $i ++ ) { - $c = mb_substr($phq, $i, 1, 'UTF-8'); - printf("// %s : '%s' (%d octets)\n", $i, $c, strlen($c)); - } - } - - $this->proposals = Array("QRY" => "", "BASES" => array(), "QUERIES" => array()); - $this->phq = $this->mb_trim($phq, 'UTF-8'); - if ($this->phq != "") { - return($this->maketree(0)); - } else { - - if ($this->errmsg != "") { - $this->errmsg .= sprintf("\\n"); - } - - $this->errmsg .= _('qparser::la question est vide'); - - return(null); - } - } - - public function astext($tree) - { - switch ($tree["CLASS"]) { - case "SIMPLE": - if (is_array($tree["VALUE"])) { - return(implode(" ", $tree["VALUE"])); - } else { - - return($tree["VALUE"]); - } - break; - case "QSIMPLE": - if (is_array($tree["VALUE"])) { - return("\"" . implode(" ", $tree["VALUE"]) . "\""); - } else { - return("\"" . $tree["VALUE"] . "\""); - } - break; - case "PHRASEA_KW_ALL": - return($tree["VALUE"][0]); - break; - case "PHRASEA_KW_LAST": - if ($tree["PNUM"] !== null) { - return("" . $tree["VALUE"][0] . "[" . $tree["PNUM"] . "]"); - } else { - return($tree["VALUE"][0]); - } - break; - case "OPS": - case "OPK": - if (isset($tree["PNUM"])) { - return("(" . $this->astext($tree["LB"]) . " " . $tree["VALUE"] . "[" . $tree["PNUM"] . "] " . $this->astext($tree["RB"]) . ")"); - } else { - return("(" . $this->astext($tree["LB"]) . " " . $tree["VALUE"] . " " . $this->astext($tree["RB"]) . ")"); - } - break; - } - } - - public function astable(&$tree) - { - $this->calc_complexity($tree); - $txt = ""; - $this->astable2($txt, $tree); - $txt = "\n\n" . $txt . "\n
\n"; - - return($txt); - } - - public function calc_complexity(&$tree) - { - if ($tree) { - if ($tree["CLASS"] == "OPS" || $tree["CLASS"] == "OPK") { - return($tree["COMPLEXITY"] = $this->calc_complexity($tree["LB"]) + $this->calc_complexity($tree["RB"])); - } else { - return($tree["COMPLEXITY"] = 1); - } - } - } - - public function astable2(&$out, &$tree, $depth = 0) - { - switch ($tree["CLASS"]) { - case "SIMPLE": - if (is_array($tree["VALUE"])) - $txt = implode(" ", $tree["VALUE"]); - else - $txt = $tree["VALUE"]; - $out .= "\t" . $txt . "\n"; - break; - case "QSIMPLE": - if (is_array($tree["VALUE"])) - $txt = implode(" ", $tree["VALUE"]); - else - $txt = $tree["VALUE"]; - $out .= "\t"" . $txt . ""\n"; - break; - case "PHRASEA_KW_ALL": - $out .= "\t" . $tree["VALUE"][0] . "\n"; - break; - case "PHRASEA_KW_LAST": - if ($tree["PNUM"] !== null) - $out .= "\t" . $tree["VALUE"][0] . "[" . $tree["PNUM"] . "]" . "\n"; - else - $out .= "\t" . $tree["VALUE"][0] . "\n"; - break; - case "OPS": - case "OPK": - $op = $tree["VALUE"]; - if (isset($tree["PNUM"])) - $op .= "[" . $tree["PNUM"] . "]"; - $out .= "\t$op\n"; - $this->astable2($out, $tree["LB"], $depth + 1); - $this->astable2($out, $tree["RB"], $depth + 1); - $out .= "\n\n"; - break; - } - } - - public function dumpDiv(&$tree) - { - print("
\n"); - $this->dumpDiv2($tree); - print("
\n"); - } - - public function dumpDiv2(&$tree, $depth = 0) - { - switch ($tree["CLASS"]) { - case "SIMPLE": - if (is_array($tree["VALUE"])) - $s = implode(" , ", $tree["VALUE"]); - else - $s = $tree["VALUE"]; - print(str_repeat("\t", $depth) . "" . $s . "\n"); - case "QSIMPLE": - $s = ""; - if (is_array($tree["VALUE"])) - $s = implode(" , ", $tree["VALUE"]); - else - $s = $tree["VALUE"]; - print(str_repeat("\t", $depth) . """ . $s . ""\n"); - break; - case "PHRASEA_KW_ALL": - printf(str_repeat("\t", $depth) . "%s\n", $tree["VALUE"][0]); - break; - case "PHRASEA_KW_LAST": - if ($tree["PNUM"] !== null) - printf(str_repeat("\t", $depth) . "%s %s\n", $tree["VALUE"][0], $tree["PNUM"]); - else - printf(str_repeat("\t", $depth) . "%s\n", $tree["VALUE"][0]); - break; - // case PHRASEA_KW_FIRST: - // if($tree["PNUM"]!==null) - // printf("%s %s", $tree["VALUE"], $tree["PNUM"]); - // else - // printf("%s", $tree["VALUE"]); - // break; - case "OPS": - case "OPK": - print(str_repeat("\t", $depth) . "
\n"); - $this->dumpDiv2($tree["LB"], $depth + 1); - print(str_repeat("\t", $depth) . "
\n"); - print(str_repeat("\t", $depth) . "
\n"); - if (isset($tree["PNUM"])) - printf(str_repeat("\t", $depth + 1) . " %s[%s]\n", $tree["VALUE"], $tree["PNUM"]); - else - printf(str_repeat("\t", $depth + 1) . " %s\n", $tree["VALUE"]); - print(str_repeat("\t", $depth) . "
\n"); - print(str_repeat("\t", $depth) . "
\n"); - $this->dumpDiv2($tree["RB"], $depth + 1); - print(str_repeat("\t", $depth) . "
\n"); - - break; - } - } - - public function dump($tree) - { - switch ($tree["CLASS"]) { - case "SIMPLE": - if (is_array($tree["VALUE"])) - $s = implode("
, ", $tree["VALUE"]); - else - $s = $tree["VALUE"]; - print("" . $s . ""); - break; - case "QSIMPLE": - if (is_array($tree["VALUE"])) - $s = implode(" , ", $tree["VALUE"]); - else - $s = $tree["VALUE"]; - print(""" . $s . """); - break; - case "PHRASEA_KW_ALL": - printf("%s", $tree["VALUE"][0]); - break; - case "PHRASEA_KW_LAST": - if ($tree["PNUM"] !== null) - printf("%s %s", $tree["VALUE"][0], $tree["PNUM"]); - else - printf("%s", $tree["VALUE"][0]); - break; - // case PHRASEA_KW_FIRST: - // if($tree["PNUM"]!==null) - // printf("%s %s", $tree["VALUE"], $tree["PNUM"]); - // else - // printf("%s", $tree["VALUE"]); - // break; - case "OPS": - case "OPK": - print(""); - print(""); - print(""); - print(""); - print(""); - print(""); - print(""); - print(""); - print("
"); - if (isset($tree["PNUM"])) - printf(" %s[%s] ", $tree["VALUE"], $tree["PNUM"]); - else - printf(" %s ", $tree["VALUE"]); - print("
"); - print($this->dump($tree["LB"])); - print(""); - print($this->dump($tree["RB"])); - print("
"); - break; - } - } - - public function priority_opk(&$tree, $depth = 0) - { - if ( ! $tree) { - return; - } - - if ($tree["CLASS"] == "OPK" && ($tree["LB"]["CLASS"] == "OPS" || $tree["LB"]["CLASS"] == "OPK")) { - // on a un truc du genre ((a ou b) < 5), on le transforme en (a ou (b < 5)) - $t = $tree["LB"]; - $tree["LB"] = $t["RB"]; - $t["RB"] = $tree; - $tree = $t; - } - if (isset($tree["LB"])) { - $this->priority_opk($tree["LB"], $depth + 1); - }if (isset($tree["RB"])) { - $this->priority_opk($tree["RB"], $depth + 1); - } - } - - public function distrib_opk(&$tree, $depth = 0) - { - if ( ! $tree) { - return; - } - - if ($tree["CLASS"] == "OPK" && ($tree["RB"]["CLASS"] == "OPS")) { - // on a un truc du genre (a = (5 ou 6)), on le transforme en ((a = 5) ou (a = 6)) - $tmp = array("CLASS" => $tree["CLASS"], - "NODETYPE" => $tree["NODETYPE"], - "VALUE" => $tree["VALUE"], - "PNUM" => $tree["PNUM"], - "LB" => $tree["LB"], - "RB" => $tree["RB"]["RB"], - "DEPTH" => $tree["LB"]["DEPTH"]); - $t = $tree["RB"]; - $tree["RB"] = $t["LB"]; - $t["LB"] = $tree; - $t["RB"] = $tmp; - $tree = $t; - } - if (isset($tree["LB"])) - $this->distrib_opk($tree["LB"], $depth + 1); - if (isset($tree["RB"])) - $this->distrib_opk($tree["RB"], $depth + 1); - } - - public function thesaurus2_apply(&$tree, $bid) - { - if ( ! $tree) { - return; - } - - if (($tree["CLASS"] == "SIMPLE" || $tree["CLASS"] == "QSIMPLE") && isset($tree["SREF"]) && isset($tree["SREF"]["TIDS"])) { - $tids = array(); - foreach ($tree["SREF"]["TIDS"] as $tid) { - if ($tid["bid"] == $bid) - $tids[] = $tid["pid"]; - } - if (count($tids) >= 1) { - /* - if (count($tids)==1) { - // on cherche un id simple, on utilisera la syntaxe sql 'like' (l'extension repérera elle méme la syntaxe car la value finit par '%') - $val = str_replace(".", "d", $tids[0]) . "d%"; - $tree["VALUE"] = array($val); - } else { - // on cherche plusieurs id's, on utilisera la syntaxe 'regexp' (l'extension repérera elle méme la syntaxe car la value finit par '$' - $val = ""; - foreach($tids as $tid) - $val .= ($val?"|":"") . "(" . str_replace(".", "d", $tid) . "d.*)"; - $tree["VALUE"] = array("^" . $val); - } - */ - $tree["VALUE"] = array(); - foreach ($tids as $tid) - $tree["VALUE"][] = str_replace(".", "d", $tid) . "d%";; - } else { - // le mot n'est pas dans le thesaurus - } - /* - */ - } - if ($tree["CLASS"] == "OPS" || $tree["CLASS"] == "OPK") { - $this->thesaurus2_apply($tree["LB"], $bid); - $this->thesaurus2_apply($tree["RB"], $bid); - } - } - - // étend (ou remplace) la recherche sur les termes simples en recherche sur thesaurus - // ex: (a et b) - // full-text only : ==> (a et b) - // thesaurus only : ==> ((th:a) et (th:b)) - // ft et thesaurus : ==> ((a ou (th:a)) et (b ou (th:b))) - // RETOURNE l'arbre résultat sans modifier l'arbre d'origine - public function extendThesaurusOnTerms(&$tree, $useFullText, $useThesaurus, $keepfuzzy) - { - $copy = $tree; - $this->_extendThesaurusOnTerms($tree, $copy, $useFullText, $useThesaurus, $keepfuzzy, 0, ""); - - $this->proposals["QRY"] = "" . $this->_queryAsHTML($tree) . ""; - - return($copy); - } - - public function _extendThesaurusOnTerms(&$tree, &$copy, $useFullText, $useThesaurus, $keepfuzzy, $depth, $path) - { - if ($depth == 0) - $ret = $tree; - if ( ! $useThesaurus) { - return; // full-text only : inchangé - } - - if (($tree["CLASS"] == "SIMPLE" || $tree["CLASS"] == "QSIMPLE")) { - if (isset($tree["CONTEXT"])) - $copy = $this->_extendToThesaurus_Simple($tree, false, $keepfuzzy, $path); - else - $copy = $this->_extendToThesaurus_Simple($tree, $useFullText, $keepfuzzy, $path); - } else { - if ($tree["CLASS"] == "OPK" && $tree["NODETYPE"] == PHRASEA_OP_COLON) { - // on a 'field:value' , on traite 'value' - $tree["RB"]["PATH"] = $copy["RB"]["PATH"] = $path . "R"; - if (isset($tree["RB"]["CONTEXT"])) - $copy["CONTEXT"] = $tree["CONTEXT"] = $tree["RB"]["CONTEXT"]; - else - if ( ! $keepfuzzy) - $copy["CONTEXT"] = $tree["CONTEXT"] = "*"; - - $copy["RB"]["SREF"] = &$tree["RB"]; - } else { - $recursL = $recursR = false; - if ($tree["CLASS"] == "OPS" && ($tree["NODETYPE"] == PHRASEA_OP_AND || $tree["NODETYPE"] == PHRASEA_OP_OR || $tree["NODETYPE"] == PHRASEA_OP_EXCEPT)) { - // on a une branche à gauche de 'ET', 'OU', 'SAUF' - $recursL = true; - } - if ($tree["CLASS"] == "OPS" && ($tree["NODETYPE"] == PHRASEA_OP_AND || $tree["NODETYPE"] == PHRASEA_OP_OR || $tree["NODETYPE"] == PHRASEA_OP_EXCEPT)) { - // on a une branche à droite de 'ET', 'OU', 'SAUF' - $recursR = true; - } - if ($recursL) - $this->_extendThesaurusOnTerms($tree["LB"], $copy["LB"], $useFullText, $useThesaurus, $keepfuzzy, $depth + 1, $path . "L"); - if ($recursR) - $this->_extendThesaurusOnTerms($tree["RB"], $copy["RB"], $useFullText, $useThesaurus, $keepfuzzy, $depth + 1, $path . "R"); - } - } - } - - // étend (ou remplace) un terme cherché en 'full-text' à une recherche thesaurus (champ non spécifié, tout le thésaurus = '*') - // le contexte éventuel est rapporté à l'opérateur ':' - // ex : a[k] ==> (a ou (TH :[k] a)) - public function _extendToThesaurus_Simple(&$simple, $keepFullText, $keepfuzzy, $path) - { - $simple["PATH"] = $path; - $context = null; - if (isset($simple["CONTEXT"])) { - $context = $simple["CONTEXT"]; - // unset($simple["CONTEXT"]); - } - if ($keepFullText) { - // on fait un OU entre la recherche ft et une recherche th - $tmp = array("CLASS" => "OPS", - "NODETYPE" => PHRASEA_OP_OR, - "VALUE" => "OR", - "PNUM" => null, - "DEPTH" => $simple["DEPTH"], - "LB" => $simple, - "RB" => array("CLASS" => "OPK", - "NODETYPE" => PHRASEA_OP_COLON, - "VALUE" => ":", - // "CONTEXT"=>$context, - "PNUM" => null, - "DEPTH" => $simple["DEPTH"] + 1, - "LB" => array("CLASS" => "SIMPLE", - "NODETYPE" => PHRASEA_KEYLIST, - "VALUE" => array("*"), - "DEPTH" => $simple["DEPTH"] + 2 - ), - "RB" => $simple - ) - ); - // on vire le contexte du coté fulltext - unset($tmp["LB"]["CONTEXT"]); - // ajoute le contexte si nécéssaire - if ($context !== null) - $tmp["RB"]["CONTEXT"] = $context; - else - if ( ! $keepfuzzy) - $tmp["RB"]["CONTEXT"] = "*"; - // corrige les profondeurs des 2 copies du 'simple' d'origine - $tmp["LB"]["DEPTH"] += 1; - $tmp["RB"]["RB"]["DEPTH"] += 2; - // note une référence vers le terme d'origine - $tmp["RB"]["RB"]["SREF"] = &$simple; - $tmp["RB"]["RB"]["PATH"] = $path; - } else { - // on remplace le ft par du th - $tmp = array("CLASS" => "OPK", - "NODETYPE" => PHRASEA_OP_COLON, - "VALUE" => ":", - // "CONTEXT"=>$context, - "PNUM" => null, - "DEPTH" => $simple["DEPTH"] + 1, - "LB" => array("CLASS" => "SIMPLE", - "NODETYPE" => PHRASEA_KEYLIST, - "VALUE" => array("*"), - "DEPTH" => $simple["DEPTH"] + 1 - ), - "RB" => $simple - ); - // ajoute le contexte si nécéssaire - if ($context !== null) - $tmp["CONTEXT"] = $context; - else - if ( ! $keepfuzzy) - $tmp["CONTEXT"] = "*"; - // corrige la profondeur de la copie du 'simple' d'origine - $tmp["RB"]["DEPTH"] += 1; - // note une référence vers le terme d'origine - $tmp["RB"]["SREF"] = &$simple; - $tmp["RB"]["PATH"] = $path; - } - - return($tmp); - } - - public function thesaurus2(&$tree, $bid, $name, &$domthe, $searchsynonyms = true, $depth = 0) - { - if ($this->debug) - print("thesaurus2:\n\$tree=" . var_export($tree, true) . "\n"); - - if ($depth == 0) - $this->proposals["BASES"]["b$bid"] = array("BID" => $bid, "NAME" => $name, "TERMS" => array()); - - if ( ! $tree) { - return(0); - } - - $ambigus = 0; - if ($tree["CLASS"] == "OPK" && $tree["NODETYPE"] == PHRASEA_OP_COLON) { -// $ambigus = $this->setTids($tree, $tree["RB"], $bid, $domthe, $searchsynonyms); - $ambigus = $this->setTids($tree, $bid, $domthe, $searchsynonyms); - } elseif ($tree["CLASS"] == "OPS" || $tree["CLASS"] == "OPK") { - $ambigus += $this->thesaurus2($tree["LB"], $bid, $name, $domthe, $searchsynonyms, $depth + 1); - $ambigus += $this->thesaurus2($tree["RB"], $bid, $name, $domthe, $searchsynonyms, $depth + 1); - } - - return($ambigus); - } - - public function propAsHTML(&$node, &$html, $path, $depth = 0) - { - global $parm; - if ($depth > 0) { - $tsy = array(); - $lngfound = "?"; - for ($n = $node->firstChild; $n; $n = $n->nextSibling) { - if ($n->nodeName == "sy") { - $lng = $n->getAttribute("lng"); - if ( ! array_key_exists($lng, $tsy)) - $tsy[$lng] = array(); - $zsy = array("v" => $n->getAttribute("v"), "w" => $n->getAttribute("w"), "k" => $n->getAttribute("k")); - - if ($lngfound == "?" || ($lng == $this->lng && $lngfound != $lng)) { - $lngfound = $lng; - $syfound = $zsy; - } else { - - } - $tsy[$lng][] = $zsy; - } - } - $alt = ""; - foreach ($tsy as $lng => $tsy2) { - foreach ($tsy2 as $sy) { - $alt .= $alt ? "\n" : ""; - $alt .= "" . $lng . ": " . p4string::MakeString($sy["v"], "js"); - } - } - - $this->proposals['QUERIES'][$syfound["w"]] = $syfound["w"]; - - $thtml = $syfound["v"]; - $kjs = $syfound["k"] ? ("'" . p4string::MakeString($syfound["k"], "js") . "'") : "null"; - $wjs = "'" . p4string::MakeString($syfound["w"], "js") . "'"; - - if ($node->getAttribute("term")) { - $thtml = "" . $thtml . ""; - $node->removeAttribute("term"); - } - - $tab = str_repeat("\t", $depth); - $html .= $tab . "
\n"; - $html .= $tab . "\t" . $thtml . "\n"; - } - - $tsort = array(); - for ($n = $node->firstChild; $n; $n = $n->nextSibling) { - if ($n->nodeType == XML_ELEMENT_NODE && $n->getAttribute("marked")) { // only 'te' marked - $lngfound = '?'; - $syfound = '?'; - for ($n2 = $n->firstChild; $n2; $n2 = $n2->nextSibling) { - if ($n2->nodeName == 'sy') { - $lng = $n2->getAttribute('lng'); - if ($lngfound == "?" || ($lng == $this->lng && $lngfound != $lng)) { - $lngfound = $lng; - $syfound = $n2->getAttribute('w'); - } - } - } - $n->removeAttribute("marked"); - for ($i = 0; array_key_exists($syfound . $i, $tsort) && $i < 9999; $i ++ ) - ; - $tsort[$syfound . $i] = $n; - } - } - ksort($tsort); - - foreach ($tsort as $n) { - $this->propAsHTML($n, $html, $path, $depth + 1); - } - - if ($depth > 0) - $html .= $tab . "
\n"; - } - - public function _queryAsHTML($tree, $depth = 0) - { - if ($depth == 0) { - $ambiguites = array("n" => 0, "refs" => array()); - } - switch ($tree["CLASS"]) { - case "SIMPLE": - case "QSIMPLE": - $w = is_array($tree["VALUE"]) ? implode(' ', $tree["VALUE"]) : $tree["VALUE"]; - if (isset($tree["PATH"])) { - $path = $tree["PATH"]; - if (isset($tree["CONTEXT"])) - $w .= ' [' . $tree["CONTEXT"] . ']'; - $txt = '"' . $w . '"'; - } else { - if (isset($tree["CONTEXT"])) - $w .= '[' . $tree["CONTEXT"] . ']'; - if ($tree["CLASS"] == "QSIMPLE") - $txt = '"' . $w . '"'; - else - $txt = $w; - } - - return($txt); - break; - case "PHRASEA_KW_ALL": - return($tree["VALUE"][0]); - break; - case "PHRASEA_KW_LAST": - if ($tree["PNUM"] !== null) { - return("" . $tree["VALUE"][0] . "[" . $tree["PNUM"] . "]"); - } else { - return($tree["VALUE"][0]); - } - break; - case "OPS": - case "OPK": - if (isset($tree["PNUM"])) { - return('(' . $this->_queryAsHTML($tree["LB"], $depth + 1) . ' ' . $tree["VALUE"] . '[' . $tree["PNUM"] . '] ' . $this->_queryAsHTML($tree["RB"], $depth + 1) . ')'); - } else { - return('(' . $this->_queryAsHTML($tree["LB"], $depth + 1) . ' ' . $tree["VALUE"] . ' ' . $this->_queryAsHTML($tree["RB"], $depth + 1) . ')'); - } - break; - } - } - - public function setTids(&$tree, $bid, &$domthe, $searchsynonyms) - { - if ($this->debug) - print("============================ setTids:\n\$tree=" . var_export($tree, true) . "\n"); - - // $this->proposals["BASES"]["b$bid"] = array("BID"=>$bid, "TERMS"=>array()); - - $ambigus = 0; - if (is_array($w = $tree["RB"]["VALUE"])) - $t = $w = implode(" ", $w); - - if (isset($tree["CONTEXT"])) { - if ( ! $tree["CONTEXT"]) { - $x0 = "@w=\"" . $w . "\" and not(@k)"; - } else { - if ($tree["CONTEXT"] == "*") { - $x0 = "@w=\"" . $w . "\""; - } else { - $x0 = "@w=\"" . $w . "\" and @k=\"" . $tree["CONTEXT"] . "\""; - $t .= " (" . $tree["CONTEXT"] . ")"; - } - } - } else { - $x0 = "@w=\"" . $w . "\""; - } - - $x = "/thesaurus//sy[" . $x0 . "]"; - - if ($this->debug) - printf("searching thesaurus with xpath='%s'
\n", $x); - - $dxp = new DOMXPath($domthe); - $nodes = $dxp->query($x); - - if ( ! isset($tree["RB"]["SREF"]["TIDS"])) - $tree["RB"]["SREF"]["TIDS"] = array(); - if ($nodes->length >= 1) { - if ($nodes->length == 1) { - // on cherche un id simple, on utilisera la syntaxe sql 'like' (l'extension repérera elle méme la syntaxe car la value finira par '%') - $this->addtoTIDS($tree["RB"], $bid, $nodes->item(0)); - // $this->thesaurusDOMNodes[] = $nodes->item(0); - } else { - // on cherche plusieurs id's, on utilisera la syntaxe 'regexp' (l'extension repérera elle meme la syntaxe car la value finira par '$') - $val = ""; - foreach ($nodes as $node) { - if ( ! isset($tree["CONTEXT"])) - $ambigus ++; - $this->addtoTIDS($tree["RB"], $bid, $node); - } - } - $path = $tree["RB"]["SREF"]["PATH"]; - $prophtml = ""; - $this->propAsHTML($domthe->documentElement, $prophtml, $path); - $this->proposals["BASES"]["b$bid"]["TERMS"][$path]["HTML"] = $prophtml; - } else { - // le mot n'est pas dans le thesaurus - } - - return($ambigus); - } - /* - function dead_setTids(&$tree, &$simple, $bid, &$domthe, $searchsynonyms) - { - // if($this->debug) - print("setTids:\n\$tree=" . var_export($tree, true) . "\n"); - - $ambigus = 0; - if(is_array($w = $simple["VALUE"])) - $t = $w = implode(" ", $w); - - if (isset($tree["CONTEXT"])) { - if (!$tree["CONTEXT"]) { - $x0 = "@w=\"" . $w ."\" and not(@k)"; - } else { - if ($tree["CONTEXT"]=="*") { - $x0 = "@w=\"" . $w ."\""; - } else { - $x0 = "@w=\"" . $w ."\" and @k=\"" . $tree["CONTEXT"] . "\""; - $t .= " (" . $tree["CONTEXT"] . ")"; - } - } - } else { - $x0 = "@w=\"" . $w ."\""; - } - - $x = "/thesaurus//sy[" . $x0 ."]"; - - if($this->debug) - printf("searching thesaurus with xpath='%s'
\n", $x); - - $dxp = new DOMXPath($domthe); - $nodes = $dxp->query($x); - - if(!isset($tree["RB"]["SREF"]["TIDS"])) - $tree["RB"]["SREF"]["TIDS"] = array(); - if ($nodes->length >= 1) { - if ($nodes->length == 1) { - // on cherche un id simple, on utilisera la syntaxe sql 'like' (l'extension repérera elle méme la syntaxe car la value finira par '%') - $this->addtoTIDS($tree["RB"], $bid, $nodes->item(0)); - // $this->thesaurusDOMNodes[] = $nodes->item(0); - } else { - // on cherche plusieurs id's, on utilisera la syntaxe 'regexp' (l'extension repérera elle meme la syntaxe car la value finira par '$') - $val = ""; - foreach ($nodes as $node) { - if(!isset($tree["CONTEXT"])) - $ambigus++; - $this->addtoTIDS($tree["RB"], $bid, $node); - } - } - $path = $tree["RB"]["SREF"]["PATH"]; - $prophtml = ""; - $this->propAsHTML($domthe->documentElement, $prophtml, $path); - $this->proposals["TERMS"][$path]["HTML"] = $prophtml; - } else { - // le mot n'est pas dans le thesaurus - } - - return($ambigus); - } - */ - - public function containsColonOperator(&$tree) - { - if ( ! $tree) { - return(false); - } - if ($tree["CLASS"] == "OPK" && $tree["NODETYPE"] == PHRASEA_OP_COLON && ($tree["RB"]["CLASS"] == "SIMPLE" || $tree["RB"]["CLASS"] == "QSIMPLE")) { - return(true); - } - $ret = false; - if ($tree["CLASS"] == "OPS" || $tree["CLASS"] == "OPK") { - $ret |= $this->containsColonOperator($tree["LB"]); - $ret |= $this->containsColonOperator($tree["RB"]); - } - - return($ret); - } - - public function addtoTIDS(&$extendednode, $bid, $DOMnode) // ajoute un tid en évitant les doublons - { - $id = $DOMnode->getAttribute("id"); - $pid = $DOMnode->parentNode->getAttribute("id"); - $lng = $DOMnode->getAttribute("lng"); - $w = $DOMnode->getAttribute("w"); - $k = $DOMnode->getAttribute("k"); - $p = $DOMnode->parentNode->getAttribute("v"); // le terme général (pére) du terme recherché : utile pour la levée d'ambiguité - - $path = $extendednode["SREF"]["PATH"]; - if ($this->debug) - printf("found node id='%s', v='%s' w='%s', k='%s', p='%s' for node-path=%s \n", $id, $DOMnode->getAttribute("v"), $w, $k, $p, $path); - - if ( ! $k) - $k = null; - - $found = false; - foreach ($extendednode["SREF"]["TIDS"] as $ztid) { - if ($ztid["bid"] != $bid) - continue; - if ($ztid["pid"] == $pid) { - $found = true; - } else { -// if($ztid["w"]==$w && $ztid["k"]==$k && $ztid["lng"]==$lng) -// { -// // FATAL : il y a un doublon réel dans le thesaurus de cette base (méme terme, méme contexte) -// // printf("FATAL doublon on base %d (%s[%s])\n", $bid, $w, $k); -// $found = true; -// break; -// } - } - } - if ( ! $found) - $extendednode["SREF"]["TIDS"][] = array("bid" => $bid, "pid" => $pid, "id" => $id, "w" => $w, "k" => $k, "lng" => $lng, "p" => $p); - - // on liste les propositions de thésaurus pour ce node (dans l'arbre simple) - if ( ! isset($this->proposals["BASES"]["b$bid"]["TERMS"][$path])) { - // $this->proposals["TERMS"][$path] = array("TERM"=>implode(" ", $extendednode["VALUE"]), "PROPOSALS"=>array()); - $term = implode(" ", $extendednode["VALUE"]); - if (isset($extendednode["CONTEXT"]) && $extendednode["CONTEXT"]) { - $term .= " (" . $extendednode["CONTEXT"] . ")"; - } - $this->proposals["BASES"]["b$bid"]["TERMS"][$path] = array("TERM" => $term); // , "PROPOSALS"=>array() ); //, "PROPOSALS_TREE"=>new DOMDocument("1.0", "UTF-8")); - } -// printf("<%s id='%s'>
\n", $DOMnode->tagName, $DOMnode->getAttribute("id")); -// printf("found node <%s id='%s' w='%s' k='%s'>
\n", $DOMnode->nodeName, $DOMnode->getAttribute('id'), $DOMnode->getAttribute('w'), $DOMnode->getAttribute('k')); - // on marque le terme principal - $DOMnode->parentNode->setAttribute("term", "1"); - // on commence par marquer les fils directs. rappel:$DOMnode pointe sur un sy - for ($node = $DOMnode->parentNode->firstChild; $node; $node = $node->nextSibling) { - if ($node->nodeName == "te") { - $node->setAttribute("marked", "1"); - } - } - // puis par remonter au père - for ($node = $DOMnode->parentNode; $node && $node->nodeType == XML_ELEMENT_NODE && $node->parentNode; $node = $node->parentNode) { - $id = $node->getAttribute("id"); - if ( ! $id) - break; // on a dépassé la racine du thésaurus - $node->setAttribute("marked", "1"); - } - } - - public function astext_ambigu($tree, &$ambiguites, $mouseCallback = "void", $depth = 0) - { - if ($depth == 0) { - $ambiguites = array("n" => 0, "refs" => array()); - } - switch ($tree["CLASS"]) { - case "SIMPLE": - case "QSIMPLE": - $prelink = $postlink = ""; - $w = is_array($tree["VALUE"]) ? implode(" ", $tree["VALUE"]) : $tree["VALUE"]; - $tab = "\n" . str_repeat("\t", $depth); - if (isset($tree["TIDS"]) && count($tree["TIDS"]) > 1) { - $ambiguites["refs"][$n = $ambiguites["n"]] = &$tree; - $txt = $tab . ""; - $txt .= $tab . "\t\"" . $w . ""; - $txt .= $tab . "\t\""; - $txt .= $tab . "\n"; - $ambiguites["n"] ++; - } else { - if (isset($tree["CONTEXT"])) - $w .= "[" . $tree["CONTEXT"] . "]"; - if ($tree["CLASS"] == "QSIMPLE") - $txt = $tab . "\"" . $w . "\"\n"; - else - $txt = $tab . "" . $w . "\n"; - } - - return($txt); - break; - case "PHRASEA_KW_ALL": - return($tree["VALUE"][0]); - break; - case "PHRASEA_KW_LAST": - if ($tree["PNUM"] !== null) { - return("" . $tree["VALUE"][0] . "[" . $tree["PNUM"] . "]"); - } else { - return($tree["VALUE"][0]); - } - break; - case "OPS": - case "OPK": - if (isset($tree["PNUM"])) { - return("(" . $this->astext_ambigu($tree["LB"], $ambiguites, $mouseCallback, $depth + 1) . " " . $tree["VALUE"] . "[" . $tree["PNUM"] . "] " . $this->astext_ambigu($tree["RB"], $ambiguites, $mouseCallback, $depth + 1) . ")"); - } else { - return("(" . $this->astext_ambigu($tree["LB"], $ambiguites, $mouseCallback, $depth + 1) . " " . $tree["VALUE"] . " " . $this->astext_ambigu($tree["RB"], $ambiguites, $mouseCallback, $depth + 1) . ")"); - } - break; - } - } - - public function get_ambigu(&$tree, $mouseCallback = "void", $depth = 0) - { - if ( ! $tree) { - return(""); - } - - unset($tree["DEPTH"]); - if ($tree["CLASS"] == "OPS" || $tree["CLASS"] == "OPK") { - $this->get_ambigu($tree["LB"], $mouseCallback, $depth + 1); - $this->get_ambigu($tree["RB"], $mouseCallback, $depth + 1); - } else { - - } - if ($depth == 0) { - $t_ambiguites = array(); - $r = ($this->astext_ambigu($tree, $t_ambiguites, $mouseCallback)); - $t_ambiguites["query"] = $r; - - return($t_ambiguites); - } - } - - public function set_default(&$tree, &$emptyw, $depth = 0) - { - if ( ! $tree) { - return(true); - } - - if ($tree["CLASS"] == "OPS" || $tree["CLASS"] == "OPK") { - if ($tree["CLASS"] == "OPS") { - if ( ! $this->set_default($tree["LB"], $emptyw, $depth + 1)) { - return(false); - } - if ( ! $this->set_default($tree["RB"], $emptyw, $depth + 1)) { - return(false); - } - } else { // OPK ! - // jy 20041223 : ne pas appliquer d'op. par def. derriere un op arith. - // ex : "d < 1/2/2003" : grouper la liste "1","2","2004" en "mot" unique - if ( ! $tree["LB"] || ($tree["LB"]["CLASS"] != "SIMPLE" && $tree["LB"]["CLASS"] != "QSIMPLE") || (is_array($tree["LB"]["VALUE"]) && count($tree["LB"]["VALUE"]) != 1)) { - // un op. arith. doit étre précédé d'un seul nom de champ - if ($this->errmsg != "") - $this->errmsg .= sprintf("\\n"); - $this->errmsg .= sprintf(_('qparser::Formulation incorrecte, un nom de champs est attendu avant l operateur %s'), $tree["VALUE"]); - - return(false); - } - if ( ! $tree["RB"] || ($tree["RB"]["CLASS"] != "SIMPLE" && $tree["RB"]["CLASS"] != "QSIMPLE")) { - // un op. arith. doit étre suivi d'une valeur - if ($this->errmsg != "") - $this->errmsg .= sprintf("\\n"); - $this->errmsg .= sprintf(_('qparser::Formulation incorrecte, une valeur est attendue apres l operateur %s'), $tree["VALUE"]); - - return(false); - } - if (is_array($tree["RB"]["VALUE"])) { - $lw = ""; - foreach ($tree["RB"]["VALUE"] as $w) - $lw .= ( $lw == "" ? "" : " ") . $w; - $tree["RB"]["VALUE"] = $lw; - } - } - - /** gestion des branches null - * a revoir car ca ppete pas d'erreur mais corrige automatiquement - * ** */ - if ( ! isset($tree["RB"])) - $tree = $tree["LB"]; - else - if ( ! isset($tree["LB"])) - $tree = $tree["RB"]; - } else { - if (($tree["CLASS"] == "SIMPLE" || $tree["CLASS"] == "QSIMPLE")) { - if (is_array($tree["VALUE"])) { - $treetmp = null; - $pnum = 0; - for ($i = 0; $i < count($tree["VALUE"]); $i ++ ) { - // gestion mot vide - if (isset($emptyw[$tree["VALUE"][$i]]) || $tree["VALUE"][$i] == "?" || $tree["VALUE"][$i] == "*") { - // on a forcé les '?' ou '*' isolés comme des mots vides - $pnum ++; - } else { - if ($treetmp == null) { - $treetmp = array("CLASS" => $tree["CLASS"], - "NODETYPE" => $tree["NODETYPE"], - "VALUE" => $tree["VALUE"][$i], - "PNUM" => $tree["PNUM"], - "DEPTH" => $tree["DEPTH"]); - $pnum = 0; - } else { - $dop = $tree["CLASS"] == "QSIMPLE" ? $this->quoted_defaultop : $this->defaultop; - $treetmp = array("CLASS" => "OPS", - "VALUE" => $dop["VALUE"], - "NODETYPE" => $dop["NODETYPE"], - "PNUM" => $pnum, // peut-être écrasé par defaultop - "DEPTH" => $depth, - "LB" => $treetmp, - "RB" => array("CLASS" => $tree["CLASS"], - "NODETYPE" => $tree["NODETYPE"], - "VALUE" => $tree["VALUE"][$i], - "PNUM" => $tree["PNUM"], - "DEPTH" => $tree["DEPTH"]) - ); - if (array_key_exists("PNUM", $dop)) - $treetmp["PNUM"] = $dop["PNUM"]; - $pnum = 0; - } - } - } - $tree = $treetmp; - } - } - } - - return(true); - } - - public function factor_or(&$tree) - { - do - $n = $this->factor_or2($tree); while ($n > 0); - } - - public function factor_or2(&$tree, $depth = 0) - { - $nmodif = 0; - if ($tree["CLASS"] == "OPS" || $tree["CLASS"] == "OPK") { - if ($tree["NODETYPE"] == PHRASEA_OP_OR && ($tree["LB"]["CLASS"] == "SIMPLE" || $tree["LB"]["CLASS"] == "QSIMPLE") && ($tree["RB"]["CLASS"] == "SIMPLE" || $tree["RB"]["CLASS"] == "QSIMPLE")) { - $tree["CLASS"] = "SIMPLE"; - $tree["NODETYPE"] = PHRASEA_KEYLIST; - $tree["VALUE"] = is_array($tree["LB"]["VALUE"]) ? $tree["LB"]["VALUE"] : array($tree["LB"]["VALUE"]); - if (is_array($tree["RB"]["VALUE"])) { - foreach ($tree["RB"]["VALUE"] as $v) - $tree["VALUE"][] = $v; - } else - $tree["VALUE"][] = $tree["RB"]["VALUE"]; - unset($tree["LB"]); - unset($tree["RB"]); - unset($tree["PNUM"]); - $nmodif ++; - } else { - $nmodif += $this->factor_or2($tree["LB"], $depth + 1); - $nmodif += $this->factor_or2($tree["RB"], $depth + 1); - } - } - - return($nmodif); - } - - public function setNumValue(&$tree, \SimpleXMLElement $sxml_struct, $depth = 0) - { - if ($tree["CLASS"] == "OPK") { - if (isset($tree["RB"]) && ($tree["RB"]["CLASS"] == "SIMPLE" || $tree["RB"]["CLASS"] == "QSIMPLE") && ($tree["LB"]["CLASS"] == "SIMPLE" || $tree["LB"]["CLASS"] == "QSIMPLE")) { - $z = $sxml_struct->xpath('/record/description'); - if ($z && is_array($z)) { - foreach ($z[0] as $ki => $vi) { - $champ = null; - if (is_array($tree["LB"]["VALUE"])) - $champ = $tree["LB"]["VALUE"][0]; - else - $champ = $tree["LB"]["VALUE"]; - if ($champ && strtoupper($ki) == strtoupper($champ)) { - foreach ($vi->attributes() as $propname => $val) { - if (strtoupper($propname) == strtoupper("type")) { - if ($tree["NODETYPE"] == PHRASEA_OP_EQUAL) // cas particulier du "=" sur une date - $this->changeNodeEquals($tree, $val); - else - $this->setNumValue2($tree["RB"], $val); - } - } - } - } - } - } - } - if (isset($tree["LB"])) - $this->setNumValue($tree["LB"], $sxml_struct, $depth + 1); - if (isset($tree["RB"])) - $this->setNumValue($tree["RB"], $sxml_struct, $depth + 1); - } - - public function changeNodeEquals(&$branch, $type) - { - if (strtoupper($type) == strtoupper("Date")) { - $branch = $this->changeNodeEquals2($branch); - } - } - - public function changeNodeEquals2($oneBranch) - { - ## creation branche gauche avec ">=" -// print("changeNodeEquals2\n"); -// print("creation branche gauche ( '>=' ) \n"); - $newTreeLB = array("CLASS" => "OPK", - "VALUE" => ">=", - "NODETYPE" => PHRASEA_OP_GEQT, - "PNUM" => NULL, - "DEPTH" => 0, - "LB" => $oneBranch["LB"], - "RB" => array("CLASS" => "SIMPLE", - "VALUE" => $this->isoDate($oneBranch["RB"]["VALUE"], false), - "NODETYPE" => PHRASEA_KEYLIST, - "PNUM" => NULL, - "DEPTH" => 0) - ); - - $newTreeRB = array("CLASS" => "OPK", - "VALUE" => "<=", - "NODETYPE" => PHRASEA_OP_LEQT, - "PNUM" => NULL, - "DEPTH" => 0, - "LB" => $oneBranch["LB"], - "RB" => array("CLASS" => "SIMPLE", - "VALUE" => $this->isoDate($oneBranch["RB"]["VALUE"], true), - "NODETYPE" => PHRASEA_KEYLIST, - "PNUM" => NULL, - "DEPTH" => 0) - ); -// print("fin creation branche droite avec '<=' \n"); - ## fin creation branche droite ( "<=" ) - - $tree = array("CLASS" => "OPS", - "VALUE" => "et", - "NODETYPE" => PHRASEA_OP_AND, - "PNUM" => NULL, - "DEPTH" => 0, - "LB" => $newTreeLB, - "RB" => $newTreeRB); - - - return $tree; - } - - public function setNumValue2(&$branch, $type) - { - if (strtoupper($type) == strtoupper("Date")) { - $dateEnIso = $this->isoDate($branch["VALUE"]); - $branch["VALUE"] = $dateEnIso; - } - } - - public function isoDate($onedate, $max = false) - { - $v_y = "1900"; - $v_m = "01"; - $v_d = "01"; - - $v_h = $v_minutes = $v_s = "00"; - if ($max) { - $v_h = $v_minutes = $v_s = "99"; - } - $tmp = $onedate; - - if ( ! is_array($tmp)) - $tmp = explode(" ", $tmp); - - switch (sizeof($tmp)) { - // on a une date complete séparé avec des espaces, slash ou tiret - case 3 : - if (strlen($tmp[0]) == 4) { - $v_y = $tmp[0]; - $v_m = $tmp[1]; - $v_d = $tmp[2]; - // on a l'année en premier, on suppose alors que c'est de la forme YYYY MM DD - } elseif (strlen($tmp[2]) == 4) { - // on a l'année en dernier, on suppose alors que c'est de la forme DD MM YYYY - $v_y = $tmp[2]; - $v_m = $tmp[1]; - $v_d = $tmp[0]; - } else { - // l'année est sur un 2 chiffre et pas 4 - // ca fou la zone - - $v_d = $tmp[0]; - $v_m = $tmp[1]; - if ($tmp[2] < 20) - $v_y = "20" . $tmp[2]; - else - $v_y = "19" . $tmp[2]; - } - break; - - case 2 : - // On supposerait n'avoir que le mois et l'année - if (strlen($tmp[0]) == 4) { - $v_y = $tmp[0]; - $v_m = $tmp[1]; - // on a l'année en premier, on suppose alors que c'est de la forme YYYY MM DD - if ($max) - $v_d = "99"; - else - $v_d = "00"; - } elseif (strlen($tmp[1]) == 4) { - // on a l'année en premier, on suppose alors que c'est de la forme DD MM YYYY - $v_y = $tmp[1]; - $v_m = $tmp[0]; - if ($max) - $v_d = "99"; - else - $v_d = "00"; - } else { - // on a l'anné sur 2 chiffres - if ($tmp[1] < 20) - $v_y = "20" . $tmp[1]; - else - $v_y = "19" . $tmp[1]; - $v_m = $tmp[0]; - if ($max) - $v_d = "99"; - else - $v_d = "00"; - } - break; - - - // lé ca devient la zone pour savoir si on a que l'année ou si c'est une date sans espaces,slash ou tiret - case 1 : - switch (strlen($tmp[0])) { - case 14 : - // date iso YYYYMMDDHHMMSS - $v_y = substr($tmp[0], 0, 4); - $v_m = substr($tmp[0], 4, 2); - $v_d = substr($tmp[0], 6, 2); - $v_h = substr($tmp[0], 8, 2); - $v_minutes = substr($tmp[0], 10, 2); - $v_s = substr($tmp[0], 12, 2); - break; - case 8 : - // date iso YYYYMMDD - $v_y = substr($tmp[0], 0, 4); - $v_m = substr($tmp[0], 4, 2); - $v_d = substr($tmp[0], 6, 2); - break; - case 6 : - // date iso YYYYMM - $v_y = substr($tmp[0], 0, 4); - $v_m = substr($tmp[0], 4, 2); - if ($max) - $v_d = "99"; - else - $v_d = "00"; - break; - case 4 : - // date iso YYYY - $v_y = $tmp[0]; - - if ($max) - $v_m = "99"; - else - $v_m = "00"; - - if ($max) - $v_d = "99"; - else - $v_d = "00"; - break; - case 2 : - // date iso YY - if ($tmp[0] < 20) - $v_y = "20" . $tmp[0]; - else - $v_y = "19" . $tmp[0]; - - if ($max) - $v_m = "99"; - else - $v_m = "00"; - - if ($max) - $v_d = "99"; - else - $v_d = "00"; - break; - } - - - - break; - } - - return("" . $v_y . $v_m . $v_d . $v_h . $v_minutes . $v_s); - } - - public function distrib_in(&$tree, $depth = 0) - { - $opdistrib = array(PHRASEA_OP_AND, PHRASEA_OP_OR, PHRASEA_OP_EXCEPT, PHRASEA_OP_NEAR, PHRASEA_OP_BEFORE, PHRASEA_OP_AFTER); // ces opérateurs sont 'distribuables' autour d'un 'IN' - - if ($tree["CLASS"] == "OPS" || $tree["CLASS"] == "OPK") { - if ($tree["NODETYPE"] == PHRASEA_OP_IN || $tree["CLASS"] == "OPK") { - if ($tree["LB"]["CLASS"] == "OPK") { - // on a un truc du genre '(t1 = t2) dans t3' - // ... on ne fait rien - } - if ($tree["LB"]["CLASS"] == "OPS" && in_array($tree["LB"]["NODETYPE"], $opdistrib)) { - // on a un truc du genre '(t1 op t2) {dans|=} t3', on distribue le dans é t1 et t2 - // ==> ((t1 dans t3) op (t2 dans t3)) - $m_v = $tree["VALUE"]; - $m_t = $tree["CLASS"]; - $m_o = $tree["NODETYPE"]; - $m_n = $tree["PNUM"]; - - $tree["CLASS"] = $tree["LB"]["CLASS"]; - $tree["NODETYPE"] = $tree["LB"]["NODETYPE"]; - $tree["VALUE"] = $tree["LB"]["VALUE"]; - $tree["PNUM"] = $tree["LB"]["PNUM"]; - - $tree["LB"]["CLASS"] = $m_t; - $tree["LB"]["NODETYPE"] = $m_o; - $tree["LB"]["VALUE"] = $m_v; - $tree["LB"]["PNUM"] = $m_n; - - $tree["RB"] = array("CLASS" => $m_t, - "NODETYPE" => $m_o, - "VALUE" => $m_v, - "PNUM" => $m_n, - "LB" => $tree["LB"]["RB"], - "RB" => $tree["RB"]); - - $tree["LB"]["RB"] = $tree["RB"]["RB"]; - // return; - } - - if ($tree["RB"]["CLASS"] == "OPS" && in_array($tree["RB"]["NODETYPE"], $opdistrib)) { - - // on a un truc du genre 't1 {dans|=} (t2 op t3)', on distribue le dans é t2 et t3 - // ==> ((t1 dans t2) ou (t1 dans t3)) - $m_v = $tree["VALUE"]; - $m_t = $tree["CLASS"]; - $m_o = $tree["NODETYPE"]; - $m_n = $tree["PNUM"]; - - $tree["CLASS"] = $tree["RB"]["CLASS"]; - $tree["NODETYPE"] = $tree["RB"]["NODETYPE"]; - $tree["VALUE"] = $tree["RB"]["VALUE"]; - $tree["PNUM"] = $tree["RB"]["PNUM"]; - - $tree["RB"]["CLASS"] = $m_t; - $tree["RB"]["NODETYPE"] = $m_o; - $tree["RB"]["VALUE"] = $m_v; - $tree["RB"]["PNUM"] = $m_n; - - $tree["LB"] = array("CLASS" => $m_t, - "NODETYPE" => $m_o, - "VALUE" => $m_v, - "PNUM" => $m_n, - "LB" => $tree["LB"], - "RB" => $tree["RB"]["LB"]); - - $tree["RB"]["LB"] = $tree["LB"]["LB"]; - } - } - $this->distrib_in($tree["LB"], $depth + 1); - $this->distrib_in($tree["RB"], $depth + 1); - } - } - - public function makequery($tree) - { - $a = array($tree["NODETYPE"]); - switch ($tree["CLASS"]) { - case "PHRASEA_KW_LAST": - if ($tree["PNUM"] !== NULL) - $a[] = $tree["PNUM"]; - break; - case "PHRASEA_KW_ALL": - break; - case "SIMPLE": - case "QSIMPLE": - // pas de tid, c'est un terme normal - if (is_array($tree["VALUE"])) { - foreach ($tree["VALUE"] as $k => $v) - $a[] = $v; - } else { - $a[] = $tree["VALUE"]; - } - break; - case "OPK": - if ($tree["LB"] !== NULL) - $a[] = $this->makequery($tree["LB"]); - if ($tree["RB"] !== NULL) - $a[] = $this->makequery($tree["RB"]); - break; - case "OPS": - if ($tree["PNUM"] !== NULL) - $a[] = intval($tree["PNUM"]); - if ($tree["LB"] !== NULL) - $a[] = $this->makequery($tree["LB"]); - if ($tree["RB"] !== NULL) - $a[] = $this->makequery($tree["RB"]); - break; - } - - return($a); - } - - public function maketree($depth, $inquote = false) - { -// printf("\n\n"); - $tree = null; - while ($t = $this->nexttoken($inquote)) { - if ($this->debug) - printf("got token %s of class %s\n", $t["VALUE"], $t["CLASS"]); - switch ($t["CLASS"]) { - case "TOK_RP": - if ($inquote) { - // quand on est entre guillements les tokens perdent leur signification - $tree = $this->addtotree($tree, $t, $depth, $inquote); - if ( ! $tree) { - return(null); - } - } else { - if ($depth <= 0) { // ')' : retour de récursivité - if ($this->errmsg != "") - $this->errmsg .= sprintf("\\n"); - $this->errmsg .= _('qparser:: erreur : trop de parentheses fermantes'); - - return(null); - } - - return($tree); - } - break; - case "TOK_LP": - if ($inquote) { - // quand on est entre guillements les tokens perdent leur signification - $tree = $this->addtotree($tree, $t, $depth, $inquote); - if ( ! $tree) { - return(null); - } - } else { // '(' : appel récursif - if ( ! $tree) - $tree = $this->maketree($depth + 1); - else { - if (($tree["CLASS"] == "OPS" || $tree["CLASS"] == "OPK") && $tree["RB"] == null) { - $tree["RB"] = $this->maketree($depth + 1); - if ( ! $tree["RB"]) - $tree = null; - } else { - // ici on applique l'opérateur par défaut - $tree = array("CLASS" => "OPS", - "VALUE" => $this->defaultop["VALUE"], - "NODETYPE" => $this->defaultop["NODETYPE"], - "PNUM" => $this->defaultop["PNUM"], - "DEPTH" => $depth, - "LB" => $tree, - "RB" => $this->maketree($depth + 1)); - } - } - if ( ! $tree) { - return(null); - } - } - break; - case "TOK_VOID": - // ce token est entre guillemets : on le saute - break; - case "TOK_QUOTE": - // une expr entre guillemets est 'comme entre parenthéses', - // sinon "a b" OU "x y" -> (((a B0 b) OU x) B0 y) au lieu de - // "a b" OU "x y" -> ((a B0 b) OU (x B0 y)) - if ($inquote) { - if ($this->debug) { - print("CLOSING QUOTE!\n"); - } - // fermeture des guillemets -> retour de récursivité - if ($depth <= 0) { // ')' : retour de récursivité - print("\nguillemets fermants en trop
"); - - return(null); - } - - return($tree); - } else { - if ($this->debug) { - print("OPENING QUOTE!
"); - } - // ouverture des guillemets -> récursivité - if ( ! $tree) - $tree = $this->maketree($depth + 1, true); - else { - if (($tree["CLASS"] == "OPS" || $tree["CLASS"] == "OPK") && $tree["RB"] == null) { - $tree["RB"] = $this->maketree($depth + 1, true); - if ( ! $tree["RB"]) - $tree = null; - } else { - // ici on applique l'opérateur par défaut - $tree = array("CLASS" => "OPS", - "VALUE" => $this->defaultop["VALUE"], - "NODETYPE" => $this->defaultop["NODETYPE"], - "PNUM" => $this->defaultop["PNUM"], - "DEPTH" => $depth, - "LB" => $tree, - "RB" => $this->maketree($depth + 1, true)); - } - } - if ( ! $tree) { - return(null); - } - } - break; - default: - $tree = $this->addtotree($tree, $t, $depth, $inquote); - if ($this->debug) { - print("---- après addtotree ----\n"); - var_dump($tree); - print("-------------------------\n"); - } - if ( ! $tree) { - return(null); - } - break; - } - } - if (($tree["CLASS"] == "OPS" || $tree["CLASS"] == "OPK") && $tree["RB"] == null) { - if ($this->errmsg != "") - $this->errmsg .= sprintf("\\n"); - $this->errmsg .= sprintf(_('qparser::Formulation incorrecte, une valeur est attendu apres %s'), $tree["VALUE"]); - $tree = $tree["LB"]; - } - - return($tree); - } - - public function addtotree($tree, $t, $depth, $inquote) - { - if ($this->debug) { - printf("addtotree({tree}, \$t[CLASS]='%s', \$t[VALUE]='%s', \$depth=%d, inquote=%s)\n", $t["CLASS"], $t["VALUE"], $depth, $inquote ? "true" : "false"); - print("---- avant addtotree ----\n"); - var_dump($tree); - print("-------------------------\n"); - } - - if ( ! $t) { - return($tree); - } - - switch ($t["CLASS"]) { - case "TOK_CONTEXT": -// if($this->debug) -// { -// printf("addtotree({tree}, \$t='%s', \$depth=%d, inquote=%s)\n", $t["VALUE"], $depth, $inquote?"true":"false"); -// } - if ($tree["CLASS"] == "SIMPLE" || $tree["CLASS"] == "QSIMPLE") { - // un [xxx] suit un terme : il introduit un contexte - $tree["CONTEXT"] = $t["VALUE"]; - } elseif ($tree["CLASS"] == "OPS" || $tree["CLASS"] == "OPK") { - if ( ! isset($tree["RB"]) || ! $tree["RB"]) { - // un [xxx] peut suivre un opérateur, c'est un paramétre normalement numérique - $tree["PNUM"] = $t["VALUE"]; - } else { - // [xxx] suit un terme déjé en branche droite ? (ex: a ou b[k]) - if ($tree["RB"]["CLASS"] == "SIMPLE" || $tree["RB"]["CLASS"] == "QSIMPLE") - $tree["RB"]["CONTEXT"] = $t["VALUE"]; - else { - if ($this->errmsg != "") - $this->errmsg .= "\\n"; - $this->errmsg .= sprintf("le contexte [%s] ne peut suivre qu'un terme ou un opérateur
", $t["VALUE"]); - - return(null); - } - } - } else { - if ($this->errmsg != "") - $this->errmsg .= "\\n"; - $this->errmsg .= sprintf("le contexte [%s] ne peut suivre qu'un terme ou un opérateur
", $t["VALUE"]); - - return(null); - } - - return($tree); - break; - case "TOK_CMP": - // < > <= >= <> = : sont des opérateurs de comparaison - if ( ! $tree) { - // printf("\nUne question ne peut commencer par '" . $t["VALUE"] . "'
"); - if ($this->errmsg != "") - $this->errmsg .= "\\n"; - $this->errmsg .= sprintf(_('qparser::erreur : une question ne peut commencer par %s'), $t["VALUE"]); - - return(null); - } - if (($tree["CLASS"] == "OPS" || $tree["CLASS"] == "OPK") && $tree["RB"] == null) { - // printf("'" . $t["VALUE"] . "' ne peut suivre un opérateur
"); - if ($this->errmsg != "") - $this->errmsg .= "\\n"; - $this->errmsg .= sprintf(_('qparser::Formulation incorrecte, ne peut suivre un operateur : %s'), $t["VALUE"]); - - return(null); - } - - return(array("CLASS" => "OPK", "VALUE" => $t["VALUE"], "NODETYPE" => $this->opk[$t["VALUE"]]["NODETYPE"], "PNUM" => null, "DEPTH" => $depth, "LB" => $tree, "RB" => null)); - break; - case "TOK_WORD": - if ($t["CLASS"] == "TOK_WORD" && isset($this->ops[$t["VALUE"]]) && ! $inquote) { - // ce mot est un opérateur phrasea - if ( ! $tree) { - // printf("\n581 : Une question ne peut commencer par un opérateur
"); - if ($this->errmsg != "") - $this->errmsg .= "\\n"; - $this->errmsg .= sprintf(_('qparser::erreur : une question ne peut commencer par %s'), $t["VALUE"]); - - return(null); - } - if (($tree["CLASS"] == "OPS" || $tree["CLASS"] == "OPK") && $tree["RB"] == null) { - - // printf("\n586 : Un opérateur ne peut suivre un opérateur
"); - if ($this->errmsg != "") - $this->errmsg .= "\\n"; - $this->errmsg .= sprintf(_('qparser::Formulation incorrecte, %s ne peut suivre un operateur'), $t["VALUE"]); - - return(null); - } - $pnum = null; - if ($this->ops[$t["VALUE"]]["CANNUM"]) { - // cet opérateur peut étre suivi d'un nombre ('near', 'before', 'after') - if ($tn = $this->nexttoken()) { - if ($tn["CLASS"] == "TOK_WORD" && is_numeric($tn["VALUE"])) - $pnum = (int) $tn["VALUE"]; - else - $this->ungettoken($tn["VALUE"]); - } - } - - return(array("CLASS" => "OPS", "VALUE" => $t["VALUE"], "NODETYPE" => $this->ops[$t["VALUE"]]["NODETYPE"], "PNUM" => $pnum, "DEPTH" => $depth, "LB" => $tree, "RB" => null)); - } else { - // ce mot n'est pas un opérateur - $pnum = null; - $nodetype = PHRASEA_KEYLIST; - if ($t["CLASS"] == "TOK_WORD" && isset($this->spw[$t["VALUE"]]) && ! $inquote) { - // mais c'est un mot 'spécial' de phrasea ('last', 'all') - $type = $this->spw[$t["VALUE"]]["CLASS"]; - $nodetype = $this->spw[$t["VALUE"]]["NODETYPE"]; - if ($this->spw[$t["VALUE"]]["CANNUM"]) { - // 'last' peut étre suivi d'un nombre - if ($tn = $this->nexttoken()) { - if ($tn["CLASS"] == "TOK_WORD" && is_numeric($tn["VALUE"])) - $pnum = (int) $tn["VALUE"]; - else - $this->ungettoken($tn["VALUE"]); - } - } - } else { - //printf("sdfsdfsdfsd
"); - $type = $inquote ? "QSIMPLE" : "SIMPLE"; - } - - return($this->addsimple($t, $type, $nodetype, $pnum, $tree, $depth)); - } - break; - } - } - - public function addsimple($t, $type, $nodetype, $pnum, $tree, $depth) - { - $nok = 0; - $registry = \registry::get_instance(); - $w = $t["VALUE"]; - if ($w != "?" && $w != "*") { // on laisse passer les 'isolés' pour les traiter plus tard comme des mots vides - for ($i = 0; $i < strlen($w); $i ++ ) { - $c = substr($w, $i, 1); - if ($c == "?" || $c == "*") { - if ($nok < $registry->get('GV_min_letters_truncation')) { - if ($this->errmsg != "") - $this->errmsg .= sprintf("\\n"); - $this->errmsg .= _('qparser:: Formulation incorrecte, necessite plus de caractere : ') . "
" . $registry->get('GV_min_letters_truncation'); - - return(null); - } - // $nok = 0; - } else - $nok ++; - } - } - if ( ! $tree) { - return(array("CLASS" => $type, "NODETYPE" => $nodetype, "VALUE" => array($t["VALUE"]), "PNUM" => $pnum, "DEPTH" => $depth)); - } - switch ($tree["CLASS"]) { - case "SIMPLE": - case "QSIMPLE": - if ($type == "SIMPLE" || $type == "QSIMPLE") - $tree["VALUE"][] = $t["VALUE"]; - else { - $tree = array("CLASS" => "OPS", - "VALUE" => "et", - "NODETYPE" => PHRASEA_OP_AND, - "PNUM" => null, - "DEPTH" => $depth, - "LB" => $tree, - "RB" => array("CLASS" => $type, - "NODETYPE" => $nodetype, - "VALUE" => array($t["VALUE"]), - "PNUM" => $pnum, - "DEPTH" => $depth)); - } - - return($tree); - case "OPS": - case "OPK": - if ($tree["RB"] == null) { - $tree["RB"] = array("CLASS" => $type, "NODETYPE" => $nodetype, "VALUE" => array($t["VALUE"]), "PNUM" => $pnum, "DEPTH" => $depth); - - return($tree); - } else { - if (($tree["RB"]["CLASS"] == "SIMPLE" || $tree["RB"]["CLASS"] == "QSIMPLE") && $tree["RB"]["DEPTH"] == $depth) { - $tree["RB"]["VALUE"][] = $t["VALUE"]; - - return($tree); - } - if (($tree["RB"]["CLASS"] == "PHRASEA_KW_LAST" || $tree["RB"]["CLASS"] == "PHRASEA_KW_ALL") && $tree["RB"]["DEPTH"] == $depth) { - $tree["RB"] = array("CLASS" => "OPS", - "VALUE" => "et", - "NODETYPE" => PHRASEA_OP_AND, - "PNUM" => null, - "DEPTH" => $depth, - "LB" => $tree["RB"], - "RB" => array("CLASS" => $type, - "NODETYPE" => $nodetype, - "VALUE" => array($t["VALUE"]), - "PNUM" => $pnum, - "DEPTH" => $depth)); - - return($tree); - } - - return(array("CLASS" => "OPS", - "VALUE" => $this->defaultop["VALUE"], - "NODETYPE" => $this->defaultop["NODETYPE"], - "PNUM" => $this->defaultop["PNUM"], - "DEPTH" => $depth, - "LB" => $tree, - "RB" => array("CLASS" => $type, "NODETYPE" => $nodetype, "VALUE" => array($t["VALUE"]), "PNUM" => $pnum, "DEPTH" => $depth) - )); - } - case "PHRASEA_KW_LAST": - case "PHRASEA_KW_ALL": - return(array("CLASS" => "OPS", - "VALUE" => "et", - "NODETYPE" => PHRASEA_OP_AND, - "PNUM" => null, - "DEPTH" => $depth, - "LB" => $tree, - "RB" => array("CLASS" => $type, - "NODETYPE" => $nodetype, - "VALUE" => array($t["VALUE"]), - "PNUM" => $pnum, - "DEPTH" => $depth))); - } - } - - public function ungettoken($s) - { - $this->phq = $s . " " . $this->phq; - } - - public function nexttoken($inquote = false) - { - if ($this->phq == "") { - return(null); - } - - switch ($c = substr($this->phq, 0, 1)) { - case "<": - case ">": - if ($inquote) { - $this->phq = $this->mb_ltrim(mb_substr($this->phq, 1, 99999, 'UTF-8'), 'UTF-8'); - - return(array("CLASS" => "TOK_VOID", "VALUE" => $c)); - } - $c2 = $c . substr($this->phq, 1, 1); - if ($c2 == "<=" || $c2 == ">=" || $c2 == "<>") { - $this->phq = $this->mb_ltrim(mb_substr($this->phq, 2, 99999, 'UTF-8'), 'UTF-8'); - $c = $c2; - } else { - $this->phq = $this->mb_ltrim(mb_substr($this->phq, 1, 99999, 'UTF-8'), 'UTF-8'); - } - - return(array("CLASS" => "TOK_CMP", "VALUE" => $c)); - break; - case "=": - if ($inquote) { - $this->phq = $this->mb_ltrim(mb_substr($this->phq, 1, 99999, 'UTF-8'), 'UTF-8'); - - return(array("CLASS" => "TOK_VOID", "VALUE" => $c)); - } - $this->phq = $this->mb_ltrim(mb_substr($this->phq, 1, 99999, 'UTF-8'), 'UTF-8'); - - return(array("CLASS" => "TOK_CMP", "VALUE" => "=")); - break; - case ":": - if ($inquote) { - $this->phq = $this->mb_ltrim(mb_substr($this->phq, 1, 99999, 'UTF-8'), 'UTF-8'); - - return(array("CLASS" => "TOK_VOID", "VALUE" => $c)); - } - $this->phq = $this->mb_ltrim(mb_substr($this->phq, 1, 99999, 'UTF-8'), 'UTF-8'); - - return(array("CLASS" => "TOK_CMP", "VALUE" => ":")); - break; - case "(": - if ($inquote) { - $this->phq = $this->mb_ltrim(mb_substr($this->phq, 1, 99999, 'UTF-8'), 'UTF-8'); - - return(array("CLASS" => "TOK_VOID", "VALUE" => $c)); - } - $this->phq = $this->mb_ltrim(mb_substr($this->phq, 1, 99999, 'UTF-8'), 'UTF-8'); - - return(array("CLASS" => "TOK_LP", "VALUE" => "(")); - break; - case ")": - if ($inquote) { - $this->phq = $this->mb_ltrim(mb_substr($this->phq, 1, 99999, 'UTF-8'), 'UTF-8'); - - return(array("CLASS" => "TOK_VOID", "VALUE" => $c)); - } - $this->phq = $this->mb_ltrim(mb_substr($this->phq, 1, 99999, 'UTF-8'), 'UTF-8'); - - return(array("CLASS" => "TOK_RP", "VALUE" => ")")); - break; - case "[": - // if($inquote) - // { - // $this->phq = ltrim(substr($this->phq, 1)); - // return(array("CLASS"=>"TOK_VOID", "VALUE"=>$c)); - // } - // un '[' introduit un contexte qu'on lit jusqu'au ']' - $closeb = mb_strpos($this->phq, "]", 1, 'UTF-8'); - if ($closeb !== false) { - $context = $this->mb_trim(mb_substr($this->phq, 1, $closeb - 1, 'UTF-8'), 'UTF-8'); - $this->phq = $this->mb_ltrim(mb_substr($this->phq, $closeb + 1, 99999, 'UTF-8'), 'UTF-8'); - } else { - $this->phq = $this->mb_ltrim(mb_substr($this->phq, 1, 99999, 'UTF-8'), 'UTF-8'); - $this->phq = ""; - } - $context = $this->unicode->remove_indexer_chars($context); - - return(array("CLASS" => "TOK_CONTEXT", "VALUE" => $context)); - break; - /* - case "]": - // if($inquote) - // { - // $this->phq = ltrim(substr($this->phq, 1)); - // return(array("CLASS"=>"TOK_VOID", "VALUE"=>$c)); - // } - $this->phq = ltrim(substr($this->phq, 1)); - - return(array("CLASS"=>"TOK_RB", "VALUE"=>"]")); - break; - */ - case "\"": - $this->phq = $this->mb_ltrim(mb_substr($this->phq, 1, 99999, 'UTF-8'), 'UTF-8'); - - return(array("CLASS" => "TOK_QUOTE", "VALUE" => "\"")); - break; - default: - $l = mb_strlen($this->phq, 'UTF-8'); - $t = ""; - $c_utf8 = ""; - for ($i = 0; $i < $l; $i ++ ) { - if ( ! $this->unicode->has_indexer_bad_char(($c_utf8 = mb_substr($this->phq, $i, 1, 'UTF-8')))) { - // $c = mb_strtolower($c); - // $t .= isset($this->noaccent[$c]) ? $this->noaccent[$c] : $c; - $t .= $this->unicode->remove_diacritics(mb_strtolower($c_utf8)); - } else - break; - } -// if ($c_utf8 == "(" || $c_utf8 == ")" || $c_utf8 == "[" || $c_utf8 == "]" || $c_utf8 == "=" || $c_utf8 == ":" || $c_utf8 == "<" || $c_utf8 == ">" || $c_utf8 == "\"") - if (in_array($c_utf8, array("(", ")", "[", "]", "=", ":", "<", ">", "\""))) { - // ces caractéres sont des délimiteurs avec un sens, il faut les garder - $this->phq = $this->mb_ltrim(mb_substr($this->phq, $i, 99999, 'UTF-8'), 'UTF-8'); - } else { - // le délimiteur était une simple ponctuation, on le saute - $this->phq = $this->mb_ltrim(mb_substr($this->phq, $i + 1, 99999, 'UTF-8'), 'UTF-8'); - } - if ($t != "") { - return(array("CLASS" => "TOK_WORD", "VALUE" => $t)); - } else { - return(array("CLASS" => "TOK_VOID", "VALUE" => $t)); - } - break; - } - } -} - diff --git a/lib/Alchemy/Phrasea/SearchEngine/SearchEngineInterface.php b/lib/Alchemy/Phrasea/SearchEngine/SearchEngineInterface.php index 3a596852c4..6cbfd1af44 100644 --- a/lib/Alchemy/Phrasea/SearchEngine/SearchEngineInterface.php +++ b/lib/Alchemy/Phrasea/SearchEngine/SearchEngineInterface.php @@ -15,6 +15,8 @@ use Alchemy\Phrasea\SearchEngine\SearchEngineOptions; use Alchemy\Phrasea\SearchEngine\SearchEngineResult; use Alchemy\Phrasea\Exception\RuntimeException; use Doctrine\Common\Collections\ArrayCollection; +use Silex\Application; +use Symfony\Component\HttpFoundation\Request; interface SearchEngineInterface { diff --git a/lib/Alchemy/Phrasea/SearchEngine/SphinxSearch.php b/lib/Alchemy/Phrasea/SearchEngine/SphinxSearch.php deleted file mode 100644 index c9d42bc8c6..0000000000 --- a/lib/Alchemy/Phrasea/SearchEngine/SphinxSearch.php +++ /dev/null @@ -1,689 +0,0 @@ -options = new SearchEngineOptions(); - - $this->sphinx = new \SphinxClient(); - - $this->sphinx->SetServer($host, $port); - $this->sphinx->SetArrayResult(true); - $this->sphinx->SetConnectTimeout(1); - - try { - $this->rt_conn = @new \PDO(sprintf('mysql:host=%s;port=%s;', $rt_host, $rt_port)); - } catch (\PDOException $e) { - $this->rt_conn = null; - } - - return $this; - } - - public function status() - { - $status = $this->sphinx->Status(); - - if (false === $status) { - throw new Exception(_('Sphinx server is offline')); - } - - if (null === $this->rt_conn) { - throw new RuntimeException('Unable to connect to sphinx rt'); - } - - return $status; - } - - public function availableTypes() - { - return array(self::GEM_TYPE_RECORD, self::GEM_TYPE_STORY); - } - - public function addRecord(\record_adapter $record) - { - $all_datas = array(); - - foreach ($record->get_caption()->get_fields(null, true) as $field) { - if ( ! $field->is_indexable()) { - continue; - } - - $all_datas[] = $field->get_serialized_values(); - - foreach ($field->get_values() as $value) { - - $this->rt_conn->exec("REPLACE INTO " - . "metas_realtime" . $this->CRCdatabox($record->get_databox()) . " VALUES ( - '" . $value->getId() . "' - ,'" . str_replace("'", "\'", $value->getValue()) . "' - ,'" . $value->getDatabox_field()->get_id() . "' - ," . $record->get_record_id() . " - ," . $record->get_sbas_id() . " - ," . $record->get_collection()->get_coll_id() . " - ," . (int) $record->is_grouping() . " - ," . crc32($record->get_sbas_id() . '_' . $value->getDatabox_field()->get_id()) . " - ," . crc32($record->get_sbas_id() . '_' . $record->get_collection()->get_coll_id()) . " - ," . crc32($record->get_sbas_id() . '_' . $record->get_record_id()) . " - ," . crc32($record->get_type()) . " - ,0 - ," . (int) $value->getDatabox_field()->isBusiness() . " - ," . crc32($record->get_collection()->get_coll_id() . '_' . (int) $value->getDatabox_field()->isBusiness()) . " - ," . $record->get_creation_date()->format('U') . " )"); - } - } - - $this->rt_conn->exec("REPLACE INTO " - . "docs_realtime" . $this->CRCdatabox($record->get_databox()) . " VALUES ( - '" . $record->get_record_id() . "' - ,'" . str_replace("'", "\'", implode(' ', $all_datas)) . "' - ," . $record->get_record_id() . " - ," . $record->get_sbas_id() . " - ," . $record->get_collection()->get_coll_id() . " - ," . (int) $record->is_grouping() . " - ," . crc32($record->get_sbas_id() . '_' . $record->get_collection()->get_coll_id()) . " - ," . crc32($record->get_sbas_id() . '_' . $record->get_record_id()) . " - ," . crc32($record->get_type()) . " - ,0 - ," . $record->get_creation_date()->format('U') . " )"); - } - - public function removeRecord(\record_adapter $record) - { - $CRCdatabox = $this->CRCdatabox($record->get_databox()); - $indexes = array( - "metadatas" . $CRCdatabox, - "metadatas" . $CRCdatabox . "_stemmed_en", - "metadatas" . $CRCdatabox . "_stemmed_fr", - ); - - foreach ($record->get_caption()->get_fields(null, true) as $field) { - - foreach ($field->get_values() as $value) { - - foreach ($indexes as $index) { - $this->sphinx->UpdateAttributes($index, array("deleted"), array($value->getId() => array(1))); - } - - $this->rt_conn->exec("DELETE FROM metas_realtime" . $CRCdatabox . " WHERE id = " . $value->getId()); - } - } - - $indexes = array( - "documents" . $CRCdatabox, - "documents" . $CRCdatabox . "_stemmed_fr", - "documents" . $CRCdatabox . "_stemmed_en" - ); - - foreach ($indexes as $index) { - $this->sphinx->UpdateAttributes($index, array("deleted"), array($record->get_record_id() => array(1))); - } - - $this->rt_conn->exec("DELETE FROM docs_realtime" . $CRCdatabox . " WHERE id = " . $record->get_record_id()); - } - - public function updateRecord(\record_adapter $record) - { - $this->removeRecord($record); - $this->addRecord($record); - } - - public function addStory(\record_adapter $record) - { - return $this->addRecord($record); - } - - public function removeStory(\record_adapter $record) - { - return $this->removeRecord($record); - } - - public function updateStory(\record_adapter $record) - { - return $this->updateRecord($record); - } - - public function addFeedEntry(\Feed_Entry_Adapter $entry) - { - throw new RuntimeException('Feed Entry indexing not supported by Sphinx Search Engine'); - } - - public function removeFeedEntry(\Feed_Entry_Adapter $entry) - { - throw new RuntimeException('Feed Entry indexing not supported by Sphinx Search Engine'); - } - - public function updateFeedEntry(\Feed_Entry_Adapter $entry) - { - throw new RuntimeException('Feed Entry indexing not supported by Sphinx Search Engine'); - } - - public function setOptions(SearchEngineOptions $options) - { - $this->options = $options; - $this->applyOptions($options); - } - - public function resetOptions() - { - $this->options = new SearchEngineOptions(); - $this->resetSphinx(); - } - - private function resetSphinx() - { - $this->sphinx->ResetGroupBy(); - $this->sphinx->ResetFilters(); - $this->sphinx->ResetOverrides(); - } - - public function query($query, $offset, $perPage) - { - assert(is_int($offset)); - assert($offset >= 0); - assert(is_int($perPage)); - - $query = $this->parseQuery($query); - - $preg = preg_match('/\s?recordid\s?=\s?([0-9]+)/i', $query, $matches, 0, 0); - - if ($preg > 0) { - $this->sphinx->SetFilter('record_id', array($matches[1])); - $query = ''; - } - - $this->sphinx->SetLimits($offset, $perPage); - $this->sphinx->SetMatchMode(SPH_MATCH_EXTENDED2); - - $index = $this->getQueryIndex($query); - $res = $this->sphinx->Query($query, $index); - - $results = new ArrayCollection(); - - if ($res === false) { - if ($this->sphinx->IsConnectError() === true) { - $error = _('Sphinx server is offline'); - } else { - $error = $this->sphinx->GetLastError(); - } - $warning = $this->sphinx->GetLastWarning(); - - $total = $available = $duration = 0; - $suggestions = $propositions = array(); - } else { - $error = $res['error']; - $warning = $res['warning']; - - $duration = $res['time']; - $total = $res['total_found']; - $available = $res['total']; - - $resultOffset = $offset; - - if (isset($res['matches'])) { - foreach ($res['matches'] as $record_id => $match) { - try { - $record = - new \record_adapter( - $match['attrs']['sbas_id'] - , $match['attrs']['record_id'] - , $resultOffset - ); - - $results->add($record); - } catch (Exception $e) { - - } - $resultOffset ++; - } - } - - $suggestions = $this->getSuggestions($query); - $propositions = array(); - } - - return new SearchEngineResult($results, $query, $duration, $offset, $available, $total, $error, $warning, $suggestions, $propositions, $index); - } - - public function autocomplete($query) - { - $words = explode(" ", $this->cleanupQuery($query)); - - return $this->getSuggestions(array_pop($words)); - } - - public function excerpt($query, $fields, \record_adapter $record) - { - $index = ''; - // in this case search is done on metas - if ($this->options->fields() || $this->options->businessFieldsOn()) { - if ($this->options->stemmed() && $this->options->getLocale()) { - $index = 'metadatas' . $this->CRCdatabox($record->get_databox()) . '_stemmed_' . $this->options->getLocale(); - } else { - $index = 'metadatas' . $this->CRCdatabox($record->get_databox()); - } - } else { - if ($this->options->stemmed()) { - $index = 'documents' . $this->CRCdatabox($record->get_databox()) . '_stemmed_' . $this->options->getLocale(); - } else { - $index = 'documents' . $this->CRCdatabox($record->get_databox()); - } - } - - $opts = array( - 'before_match' => "", - 'after_match' => "", - ); - - $fields_to_send = array(); - - foreach ($fields as $k => $f) { - $fields_to_send[$k] = $f['value']; - } - - return $this->sphinx->BuildExcerpts($fields_to_send, $index, $query, $opts); - } - - public function resetCache() - { - return $this; - } - - /** - * Reset sphinx client and apply the options - * - * Only apply filters and group by - * - * @param SearchEngineOptions $options - * @return SphinxSearch - */ - protected function applyOptions(SearchEngineOptions $options) - { - $this->resetSphinx(); - - $filters = array(); - - foreach ($options->collections() as $collection) { - $filters[] = crc32($collection->get_databox()->get_sbas_id() . '_' . $collection->get_coll_id()); - } - - $this->sphinx->SetFilter('crc_sbas_coll', $filters); - - $this->sphinx->SetFilter('deleted', array(0)); - $this->sphinx->SetFilter('parent_record_id', array($options->searchType())); - - - if ($options->fields()) { - - $filters = array(); - foreach ($options->fields() as $field) { - $filters[] = crc32($field->get_databox()->get_sbas_id() . '_' . $field->get_id()); - } - - $this->sphinx->SetFilter('crc_struct_id', $filters); - } - - if ($options->businessFieldsOn()) { - - $crc_coll_business = array(); - - foreach ($options->businessFieldsOn() as $collection) { - $crc_coll_business[] = crc32($collection->get_coll_id() . '_1'); - $crc_coll_business[] = crc32($collection->get_coll_id() . '_0'); - } - - $non_business = array(); - - foreach ($options->collections() as $collection) { - foreach ($options->businessFieldsOn() as $BFcollection) { - if ($collection->get_base_id() == $BFcollection->get_base_id()) { - continue 2; - } - } - $non_business[] = $collection; - } - - foreach ($non_business as $collection) { - $crc_coll_business[] = crc32($collection->get_coll_id() . '_0'); - } - - $this->sphinx->SetFilter('crc_coll_business', $crc_coll_business); - } elseif ($options->fields()) { - $this->sphinx->SetFilter('business', array(0)); - } - - /** - * @todo : enhance : check status in a better way - */ - foreach ($options->databoxes() as $databox) { - $status_opts = $options->getStatus(); - foreach ($databox->get_statusbits() as $n => $status) { - if ( ! array_key_exists($n, $status_opts)) - continue; - if ( ! array_key_exists($databox->get_sbas_id(), $status_opts[$n])) - continue; - $crc = crc32($databox->get_sbas_id() . '_' . $n); - $this->sphinx->SetFilter('status', array($crc), ($status_opts[$n][$databox->get_sbas_id()] == '0')); - } - } - - - if ($options->getRecordType()) { - $this->sphinx->SetFilter('crc_type', array(crc32($options->getRecordType()))); - } - - - $order = ''; - switch ($options->sortOrder()) { - case SearchEngineOptions::SORT_MODE_ASC: - $order = 'ASC'; - break; - case SearchEngineOptions::SORT_MODE_DESC: - default: - $order = 'DESC'; - break; - } - - switch ($options->sortBy()) { - case SearchEngineOptions::SORT_RANDOM: - $sort = '@random'; - break; - case SearchEngineOptions::SORT_RELEVANCE: - default: - $sort = '@relevance ' . $order . ', created_on ' . $order; - break; - case SearchEngineOptions::SORT_CREATED_ON: - $sort = 'created_on ' . $order; - break; - } - - $this->sphinx->SetGroupBy('crc_sbas_record', SPH_GROUPBY_ATTR, $sort); - - return $this; - } - - /** - * Return unique integer key for a databox - * - * @param \databox $databox - * @return int - */ - private function CRCdatabox(\databox $databox) - { - return crc32( - str_replace( - array('.', '%') - , '_' - , sprintf('%s_%s_%s_%s', $databox->get_host(), $databox->get_port(), $databox->get_user(), $databox->get_dbname()) - ) - ); - } - - /** - * Remove all keywords, operators, quotes from a query string - * - * @param string $query - * @return string - */ - private function cleanupQuery($query) - { - return str_replace(array("all", "last", "et", "ou", "sauf", "and", "or", "except", "in", "dans", "'", '"', "(", ")", "_", "-", "+"), ' ', $query); - } - - /** - * Return a collection of suggestion corresponding a query - * - * @param string $query - * @return ArrayCollection An array collection of SearchEngineSuggestion - */ - private function getSuggestions($query) - { - // First we split the query into simple words - $words = explode(" ", $this->cleanupQuery(mb_strtolower($query))); - - $tmpWords = array(); - - foreach ($words as $word) { - if (trim($word) === '') { - continue; - } - $tmpWords[] = $word; - } - - $words = array_unique($tmpWords); - - $altVersions = array(); - - // As we got words, we look for alternate word for each of them - if (function_exists('enchant_broker_init') && $this->options->getLocale()) { - $broker = enchant_broker_init(); - if (enchant_broker_dict_exists($broker, $this->options->getLocale())) { - $dictionnary = enchant_broker_request_dict($broker, $this->options->getLocale()); - - foreach ($words as $word) { - - if (enchant_dict_check($dictionnary, $word) == false) { - $suggs = array_merge(array($word), enchant_dict_suggest($dictionnary, $word)); - } - - $altVersions[$word] = array_unique($suggs); - } - enchant_broker_free_dict($dictionnary); - } - enchant_broker_free($broker); - } - - /** - * @todo enhance the trigramm query, as it could be sent in one batch - */ - foreach ($altVersions as $word => $versions) { - $altVersions[$word] = array_unique(array_merge($versions, $this->get_sugg_trigrams($word))); - } - - // We now build an array of all possibilities based on the original query - $queries = array($query); - - foreach ($altVersions as $word => $versions) { - $tmp_queries = array(); - foreach ($versions as $version) { - foreach ($queries as $alt_query) { - $tmp_queries[] = $alt_query; - $tmp_queries[] = str_replace($word, $version, $alt_query); - } - $tmp_queries[] = str_replace($word, $version, $query); - } - $queries = array_unique(array_merge($queries, $tmp_queries)); - } - - $suggestions = array(); - $max_results = 0; - - foreach ($queries as $alt_query) { - $results = $this->sphinx->Query($alt_query, $this->getQueryIndex($alt_query)); - - if ($results !== false && isset($results['total_found'])) { - if ($results['total_found'] > 0) { - - $max_results = max($max_results, (int) $results['total_found']); - $suggestions[] = new SearchEngineSuggestion($query, $alt_query, (int) $results['total_found']); - } - } - } - - usort($suggestions, array('self', 'suggestionsHitSorter')); - - $tmpSuggestions = new ArrayCollection(); - foreach ($suggestions as $key => $suggestion) { - if ($suggestion->hits() < ($max_results / 100)) { - continue; - } - $tmpSuggestions->add($suggestion); - } - - return $tmpSuggestions; - } - - private static function suggestionsHitSorter(SearchEngineSuggestion $a, SearchEngineSuggestion $b) - { - if ($a->hits() == $b->hits()) { - return 0; - } - - return ($a->hits() > $b->hits()) ? -1 : 1; - } - - private function BuildTrigrams($keyword) - { - $t = "__" . $keyword . "__"; - - $trigrams = ""; - for ($i = 0; $i < strlen($t) - 2; $i ++ ) { - $trigrams .= substr($t, $i, 3) . " "; - } - - return $trigrams; - } - - private function get_sugg_trigrams($word) - { - $trigrams = $this->BuildTrigrams($word); - $query = "\"$trigrams\"/1"; - $len = strlen($word); - - $this->resetSphinx(); - - $this->sphinx->SetMatchMode(SPH_MATCH_EXTENDED2); - $this->sphinx->SetRankingMode(SPH_RANK_WORDCOUNT); - $this->sphinx->SetFilterRange("len", $len - 2, $len + 4); - - $this->sphinx->SetSortMode(SPH_SORT_EXTENDED, "@weight DESC"); - $this->sphinx->SetLimits(0, 10); - - $indexes = array(); - - foreach ($this->options->databoxes() as $databox) { - $indexes[] = 'suggest' . $this->CRCdatabox($databox); - } - - $index = implode(',', $indexes); - - $res = $this->sphinx->Query($query, $index); - - if ($this->sphinx->Status() === false) { - return array(); - } - - if ( ! $res || ! isset($res["matches"])) { - return array(); - } - - $this->sphinx->ResetGroupBy(); - $this->sphinx->ResetFilters(); - - $words = array(); - foreach ($res["matches"] as $match) { - $words[] = $match['attrs']['keyword']; - } - - $this->applyOptions($this->options); - - return $words; - } - - private function getQueryIndex($query) - { - $index = '*'; - - $index_keys = array(); - - foreach ($this->options->databoxes() as $databox) { - $index_keys[] = $this->CRCdatabox($databox); - } - - if (count($index_keys) > 0) { - if ($this->options->fields() || $this->options->businessFieldsOn()) { - if ($query !== '' && $this->options->stemmed() && $this->options->getLocale()) { - $index = ', metadatas' . implode('_stemmed_' . $this->options->getLocale() . ', metadatas', $index_keys) . '_stemmed_' . $this->options->getLocale(); - } else { - $index = 'metadatas' . implode(',metadatas', $index_keys); - } - $index .= ', metas_realtime' . implode(', metas_realtime', $index_keys); - } else { - if ($query !== '' && $this->options->stemmed() && $this->options->getLocale()) { - $index .= ', documents' . implode('_stemmed_' . $this->options->getLocale() . ', documents', $index_keys) . '_stemmed_' . $this->options->getLocale(); - } else { - $index .= 'documents' . implode(', documents', $index_keys); - } - $index .= ', docs_realtime' . implode(', docs_realtime', $index_keys); - } - } - - return $index; - } - - private function parseQuery($query) - { - $query = trim($query); - - while (substr($query, 0, 1) === '(' && substr($query, -1) === ')') { - $query = substr($query, 1, (mb_strlen($query) - 2)); - } - - if ($query == 'all') { - $query = ''; - } - - while (mb_strpos($query, ' ') !== false) { - $query = str_replace(' ', ' ', $query); - } - - - $offset = 0; - while (false !== $pos = mb_strpos($query, '-', $offset)) { - $offset = $pos + 1; - if ($pos === 0) { - continue; - } - if (mb_substr($query, ($pos - 1), 1) !== ' ') { - $query = mb_substr($query, 0, ($pos)) . ' ' . mb_substr($query, $pos + 1); - } - } - - $query = str_ireplace(array(' ou ', ' or '), '|', $query); - $query = str_ireplace(array(' sauf ', ' except '), ' -', $query); - $query = str_ireplace(array(' and ', ' et '), ' +', $query); - - return $query; - } -} - diff --git a/lib/Alchemy/Phrasea/SearchEngine/SphinxSearch/SphinxSearchEngine.php b/lib/Alchemy/Phrasea/SearchEngine/SphinxSearch/SphinxSearchEngine.php index 95caa66f36..a68e887daa 100644 --- a/lib/Alchemy/Phrasea/SearchEngine/SphinxSearch/SphinxSearchEngine.php +++ b/lib/Alchemy/Phrasea/SearchEngine/SphinxSearch/SphinxSearchEngine.php @@ -11,15 +11,16 @@ namespace Alchemy\Phrasea\SearchEngine\SphinxSearch; -use Alchemy\Phrasea\Application; use Alchemy\Phrasea\SearchEngine\SearchEngineInterface; use Alchemy\Phrasea\SearchEngine\SearchEngineOptions; use Alchemy\Phrasea\SearchEngine\SearchEngineResult; use Alchemy\Phrasea\SearchEngine\SearchEngineSuggestion; use Alchemy\Phrasea\Exception\RuntimeException; use Doctrine\Common\Collections\ArrayCollection; -use Symfony\Component\Process\ExecutableFinder; -use Symfony\Component\Process\Process; +use Silex\Application; +use Symfony\Component\HttpFoundation\Request; + +require_once __DIR__ . '/../../../../vendor/sphinx/sphinxapi.php'; class SphinxSearchEngine implements SearchEngineInterface { @@ -29,12 +30,6 @@ class SphinxSearchEngine implements SearchEngineInterface */ protected $sphinx; - /** - * - * @var \SphinxClient - */ - protected $suggestionClient; - /** * * @var \PDO @@ -42,26 +37,19 @@ class SphinxSearchEngine implements SearchEngineInterface protected $rt_conn; protected $configurationPanel; protected $options; - protected $app; - public function __construct(Application $app, $host, $port, $rt_host, $rt_port) + public function __construct($host, $port, $rt_host, $rt_port) { - $this->app = $app; $this->options = new SearchEngineOptions(); $this->sphinx = new \SphinxClient(); + $this->sphinx->SetServer($host, $port); $this->sphinx->SetArrayResult(true); $this->sphinx->SetConnectTimeout(1); - $this->suggestionClient = new \SphinxClient(); - $this->suggestionClient->SetServer($host, $port); - $this->suggestionClient->SetArrayResult(true); - $this->suggestionClient->SetConnectTimeout(1); - try { $this->rt_conn = @new \PDO(sprintf('mysql:host=%s;port=%s;', $rt_host, $rt_port)); - $this->rt_conn->setAttribute(\PDO::ATTR_ERRMODE, \PDO::ERRMODE_EXCEPTION); } catch (\PDOException $e) { $this->rt_conn = null; } @@ -71,11 +59,9 @@ class SphinxSearchEngine implements SearchEngineInterface public function status() { - if (false === $this->sphinx->Status()) { - throw new RuntimeException(_('Sphinx server is offline')); - } + $status = $this->sphinx->Status(); - if (false === $this->suggestionClient->Status()) { + if (false === $status) { throw new RuntimeException(_('Sphinx server is offline')); } @@ -83,7 +69,17 @@ class SphinxSearchEngine implements SearchEngineInterface throw new RuntimeException('Unable to connect to sphinx rt'); } - return $this->sphinx->Status(); + return $status; + } + + public function getConfigurationPanel(Application $app, Request $request) + { + return $this->configurationPanel()->get($app, $request); + } + + public function postConfigurationPanel(Application $app, Request $request) + { + return $this->configurationPanel()->post($app, $request); } /** @@ -92,7 +88,7 @@ class SphinxSearchEngine implements SearchEngineInterface */ public function configurationPanel() { - if (!$this->configurationPanel) { + if ( ! $this->configurationPanel) { $this->configurationPanel = new ConfigurationPanel($this); } @@ -106,33 +102,18 @@ class SphinxSearchEngine implements SearchEngineInterface public function addRecord(\record_adapter $record) { - if (!$this->rt_conn) { - throw new RuntimeException('Unable to connect to sphinx real-time index'); - } - $all_datas = array(); - $status = array(); - - $binStatus = strrev($record->get_status()); - - for ($i = 4; $i < 32; $i++) { - if ($binStatus[$i]) { - $status[] = crc32($record->get_databox()->get_sbas_id() . '_' . $i); - } - } foreach ($record->get_caption()->get_fields(null, true) as $field) { - if (!$field->is_indexable()) { + if ( ! $field->is_indexable()) { continue; } - if (!$field->get_databox_field()->isBusiness()) { - $all_datas[] = $field->get_serialized_values(); - } + $all_datas[] = $field->get_serialized_values(); foreach ($field->get_values() as $value) { - $sql = "REPLACE INTO " + $this->rt_conn->exec("REPLACE INTO " . "metas_realtime" . $this->CRCdatabox($record->get_databox()) . " VALUES ( '" . $value->getId() . "' ,'" . str_replace("'", "\'", $value->getValue()) . "' @@ -148,10 +129,7 @@ class SphinxSearchEngine implements SearchEngineInterface ,0 ," . (int) $value->getDatabox_field()->isBusiness() . " ," . crc32($record->get_collection()->get_coll_id() . '_' . (int) $value->getDatabox_field()->isBusiness()) . " - ," . $record->get_creation_date()->format('U') . " - ,(" . implode(',', $status) . ") )"; - - $this->rt_conn->exec($sql); + ," . $record->get_creation_date()->format('U') . " )"); } } @@ -167,18 +145,13 @@ class SphinxSearchEngine implements SearchEngineInterface ," . crc32($record->get_sbas_id() . '_' . $record->get_record_id()) . " ," . crc32($record->get_type()) . " ,0 - ," . $record->get_creation_date()->format('U') . " - ,(" . implode(',', $status) . ") )"); + ," . $record->get_creation_date()->format('U') . " )"); return $this; } public function removeRecord(\record_adapter $record) { - if (!$this->rt_conn) { - throw new RuntimeException('Unable to connect to sphinx real-time index'); - } - $CRCdatabox = $this->CRCdatabox($record->get_databox()); $indexes = array( "metadatas" . $CRCdatabox, @@ -194,7 +167,9 @@ class SphinxSearchEngine implements SearchEngineInterface $this->sphinx->UpdateAttributes($index, array("deleted"), array($value->getId() => array(1))); } - $this->rt_conn->exec("DELETE FROM metas_realtime" . $CRCdatabox . " WHERE id = " . $value->getId()); + $stmt = $this->rt_conn->exec("DELETE FROM metas_realtime" . $CRCdatabox . " WHERE id = " . $value->getId()); + $stmt->execute(); + $stmt->closeCursor(); } } @@ -265,6 +240,7 @@ class SphinxSearchEngine implements SearchEngineInterface { $this->sphinx->ResetGroupBy(); $this->sphinx->ResetFilters(); + $this->sphinx->ResetOverrides(); } public function query($query, $offset, $perPage) @@ -275,10 +251,10 @@ class SphinxSearchEngine implements SearchEngineInterface $query = $this->parseQuery($query); - $preg = preg_match('/\s?(recordid|storyid)\s?=\s?([0-9]+)/i', $query, $matches, 0, 0); + $preg = preg_match('/\s?recordid\s?=\s?([0-9]+)/i', $query, $matches, 0, 0); if ($preg > 0) { - $this->sphinx->SetFilter('record_id', array($matches[2])); + $this->sphinx->SetFilter('record_id', array($matches[1])); $query = ''; } @@ -315,17 +291,16 @@ class SphinxSearchEngine implements SearchEngineInterface try { $record = new \record_adapter( - $this->app, - $match['attrs']['sbas_id'], - $match['attrs']['record_id'], - $resultOffset + $match['attrs']['sbas_id'] + , $match['attrs']['record_id'] + , $resultOffset ); $results->add($record); } catch (Exception $e) { } - $resultOffset++; + $resultOffset ++; } } @@ -354,7 +329,7 @@ class SphinxSearchEngine implements SearchEngineInterface $index = 'metadatas' . $this->CRCdatabox($record->get_databox()); } } else { - if ($this->options->stemmed() && $this->options->getLocale()) { + if ($this->options->stemmed()) { $index = 'documents' . $this->CRCdatabox($record->get_databox()) . '_stemmed_' . $this->options->getLocale(); } else { $index = 'documents' . $this->CRCdatabox($record->get_databox()); @@ -463,12 +438,12 @@ class SphinxSearchEngine implements SearchEngineInterface /** * @todo : enhance : check status in a better way */ - $status_opts = $options->getStatus(); foreach ($options->databoxes() as $databox) { + $status_opts = $options->getStatus(); foreach ($databox->get_statusbits() as $n => $status) { - if (!array_key_exists($n, $status_opts)) + if ( ! array_key_exists($n, $status_opts)) continue; - if (!array_key_exists($databox->get_sbas_id(), $status_opts[$n])) + if ( ! array_key_exists($databox->get_sbas_id(), $status_opts[$n])) continue; $crc = crc32($databox->get_sbas_id() . '_' . $n); $this->sphinx->SetFilter('status', array($crc), ($status_opts[$n][$databox->get_sbas_id()] == '0')); @@ -545,10 +520,6 @@ class SphinxSearchEngine implements SearchEngineInterface $altVersions = array(); - foreach ($words as $word) { - $altVersions[$word] = array($word); - } - // As we got words, we look for alternate word for each of them if (function_exists('enchant_broker_init') && $this->options->getLocale()) { $broker = enchant_broker_init(); @@ -558,7 +529,7 @@ class SphinxSearchEngine implements SearchEngineInterface foreach ($words as $word) { if (enchant_dict_check($dictionnary, $word) == false) { - $suggs = array_merge(enchant_dict_suggest($dictionnary, $word)); + $suggs = array_merge(array($word), enchant_dict_suggest($dictionnary, $word)); } $altVersions[$word] = array_unique($suggs); @@ -595,6 +566,7 @@ class SphinxSearchEngine implements SearchEngineInterface foreach ($queries as $alt_query) { $results = $this->sphinx->Query($alt_query, $this->getQueryIndex($alt_query)); + if ($results !== false && isset($results['total_found'])) { if ($results['total_found'] > 0) { @@ -631,7 +603,7 @@ class SphinxSearchEngine implements SearchEngineInterface $t = "__" . $keyword . "__"; $trigrams = ""; - for ($i = 0; $i < strlen($t) - 2; $i++) { + for ($i = 0; $i < strlen($t) - 2; $i ++ ) { $trigrams .= substr($t, $i, 3) . " "; } @@ -646,12 +618,12 @@ class SphinxSearchEngine implements SearchEngineInterface $this->resetSphinx(); - $this->suggestionClient->SetMatchMode(SPH_MATCH_EXTENDED2); - $this->suggestionClient->SetRankingMode(SPH_RANK_WORDCOUNT); - $this->suggestionClient->SetFilterRange("len", $len - 2, $len + 4); + $this->sphinx->SetMatchMode(SPH_MATCH_EXTENDED2); + $this->sphinx->SetRankingMode(SPH_RANK_WORDCOUNT); + $this->sphinx->SetFilterRange("len", $len - 2, $len + 4); - $this->suggestionClient->SetSortMode(SPH_SORT_EXTENDED, "@weight DESC"); - $this->suggestionClient->SetLimits(0, 10); + $this->sphinx->SetSortMode(SPH_SORT_EXTENDED, "@weight DESC"); + $this->sphinx->SetLimits(0, 10); $indexes = array(); @@ -660,21 +632,27 @@ class SphinxSearchEngine implements SearchEngineInterface } $index = implode(',', $indexes); - $res = $this->suggestionClient->Query($query, $index); - if ($this->suggestionClient->Status() === false) { + $res = $this->sphinx->Query($query, $index); + + if ($this->sphinx->Status() === false) { return array(); } - if (!$res || !isset($res["matches"])) { + if ( ! $res || ! isset($res["matches"])) { return array(); } + $this->sphinx->ResetGroupBy(); + $this->sphinx->ResetFilters(); + $words = array(); foreach ($res["matches"] as $match) { $words[] = $match['attrs']['keyword']; } + $this->applyOptions($this->options); + return $words; } @@ -691,14 +669,14 @@ class SphinxSearchEngine implements SearchEngineInterface if (count($index_keys) > 0) { if ($this->options->fields() || $this->options->businessFieldsOn()) { if ($query !== '' && $this->options->stemmed() && $this->options->getLocale()) { - $index = 'metadatas' . implode('_stemmed_' . $this->options->getLocale() . ', metadatas', $index_keys) . '_stemmed_' . $this->options->getLocale(); + $index = ', metadatas' . implode('_stemmed_' . $this->options->getLocale() . ', metadatas', $index_keys) . '_stemmed_' . $this->options->getLocale(); } else { $index = 'metadatas' . implode(',metadatas', $index_keys); } $index .= ', metas_realtime' . implode(', metas_realtime', $index_keys); } else { if ($query !== '' && $this->options->stemmed() && $this->options->getLocale()) { - $index = 'documents' . implode('_stemmed_' . $this->options->getLocale() . ', documents', $index_keys) . '_stemmed_' . $this->options->getLocale(); + $index = ', documents' . implode('_stemmed_' . $this->options->getLocale() . ', documents', $index_keys) . '_stemmed_' . $this->options->getLocale(); } else { $index = 'documents' . implode(', documents', $index_keys); } @@ -743,92 +721,5 @@ class SphinxSearchEngine implements SearchEngineInterface return $query; } - - public function buildSuggestions(array $databoxes, $configuration, $threshold = 10) - { - $executableFinder = new ExecutableFinder(); - $indexer = $executableFinder->find('indexer'); - - if (!is_executable($indexer)) { - throw new RuntimeException('Indexer does not seem to be executable'); - } - - foreach ($databoxes as $databox) { - $tmp_file = tempnam(sys_get_temp_dir(), 'sphinx_sugg'); - - $cmd = $indexer . ' --config ' . $configuration . ' metadatas' . $this->CRCdatabox($databox) - . ' --buildstops ' . $tmp_file . ' 1000000 --buildfreqs'; - $process = new Process($cmd); - $process->run(); - - $sql = 'TRUNCATE suggest'; - $stmt = $databox->get_connection()->prepare($sql); - $stmt->execute(); - $stmt->closeCursor(); - - if (null !== $sql = $this->BuildDictionarySQL(file_get_contents($tmp_file), $threshold)) { - $stmt = $databox->get_connection()->prepare($sql); - $stmt->execute(); - $stmt->closeCursor(); - } - - unlink($tmp_file); - } - - return $this; - } - - protected function BuildDictionarySQL($dictionnary, $threshold) - { - $out = array(); - - $n = 1; - $lines = explode("\n", $dictionnary); - foreach ($lines as $line) { - if (trim($line) === '') { - continue; - } - - list ( $keyword, $freq ) = explode(" ", trim($line)); - - if ($freq < $threshold || strstr($keyword, "_") !== false || strstr($keyword, "'") !== false) { - continue; - } - - if (ctype_digit($keyword)) { - continue; - } - if (mb_strlen($keyword) < 3) { - continue; - } - - $trigrams = $this->BuildTrigrams($keyword); - - $out[] = "( $n, '$keyword', '$trigrams', $freq )"; - $n++; - } - - if ($out) { - return "INSERT INTO suggest VALUES " . implode(",\n", $out) . ";"; - } - - return null; - } - - /** - * @inheritdoc - */ - public function clearCache() - { - return $this; - } - - /** - * @inheritdoc - */ - public function clearAllCache(\DateTime $date = null) - { - return $this; - } }