diff --git a/lib/classes/searchEngine/adapter.class.php b/lib/classes/searchEngine/adapter.class.php deleted file mode 100644 index 8014cf0171..0000000000 --- a/lib/classes/searchEngine/adapter.class.php +++ /dev/null @@ -1,160 +0,0 @@ -get('GV_sphinx')) { - $this->search_engine = new searchEngine_adapter_sphinx_engine($app); - } elseif (function_exists('phrasea_query2')) { - $this->search_engine = new searchEngine_adapter_phrasea_engine($app); - } else { - throw new Exception('No search engine available, try phrasea2 or sphinx'); - } - - return $this; - } - - /** - * - * @param searchEngine_options $options - * @return searchEngine_adapter - */ - public function set_options(searchEngine_options $options) - { - $this->search_options = $options; - $this->search_engine->set_options($options); - - return $this; - } - - /** - * - * @param boolean $boolean - * @return searchEngine_adapter - */ - public function set_is_first_page($boolean) - { - $this->first_page = ! ! $boolean; - - return $this; - } - - /** - * - * @return boolean - */ - public function is_first_page() - { - return $this->first_page; - } - - /** - * - * @return string - */ - public function get_query() - { - return $this->search_engine->get_parsed_query(); - } - - /** - * - * @return searchEngine_adapter - */ - public function reset_cache() - { - $this->search_engine->reset_cache(); - - return $this; - } - - /** - * - * @param string $query - * @param int $page - * @param int $perPage - * @return searchEngine_results - */ - public function query_per_page($query, $page, $perPage) - { - assert(is_int($page)); - assert($page > 0); - assert(is_int($perPage)); - assert($perPage > 0); - $offset = ($page - 1) * $perPage; - - return $this->search_engine->results($query, $offset, $perPage); - } - - /** - * - * @param string $query - * @param int $offset - * @param int $perPage - * @return searchEngine_results - */ - public function query_per_offset($query, $offset, $perPage) - { - assert(is_int($offset)); - assert($offset >= 0); - assert(is_int($perPage)); - assert($perPage > 0); - - return $this->search_engine->results($query, $offset, $perPage); - } - - /** - * - * @return array - */ - public function get_status() - { - return $this->search_engine->get_status(); - } - - public function build_excerpt($query, array $fields, record_adapter $record) - { - return $this->search_engine->build_excerpt($query, $fields, $record); - } -} diff --git a/lib/classes/searchEngine/adapter/abstract.class.php b/lib/classes/searchEngine/adapter/abstract.class.php deleted file mode 100644 index 3c021fef7e..0000000000 --- a/lib/classes/searchEngine/adapter/abstract.class.php +++ /dev/null @@ -1,199 +0,0 @@ -total_available; - } - - /** - * - * @return float - */ - public function get_time() - { - return $this->total_time; - } - - /** - * - * @return string - */ - public function get_error() - { - return $this->error; - } - - /** - * - * @return string - */ - public function get_warning() - { - return $this->warning; - } - - /** - * - * @return string - */ - public function get_propositions() - { - return null; - } - - /** - * - * @return searchEngine_adapter_abstract - */ - public function reset_cache() - { - return $this; - } - - /** - * - * @return int - */ - public function get_per_page() - { - return (int) $this->perPage; - } - - /** - * - * @return int - */ - public function get_total_results() - { - return (int) $this->total_results; - } - - /** - * - * @return int - */ - public function get_total_pages() - { - return (int) ceil($this->get_available_results() / $this->get_per_page()); - } - - /** - * - * @return int - */ - public function get_current_page() - { - return (int) $this->current_page; - } - - /** - * - * @return int - */ - public function get_offset_start() - { - return (int) $this->offset_start; - } - - /** - * - * @return string - */ - public function get_current_indexes() - { - return $this->current_index; - } -} diff --git a/lib/classes/searchEngine/adapter/interface.class.php b/lib/classes/searchEngine/adapter/interface.class.php deleted file mode 100644 index 25b65cee8d..0000000000 --- a/lib/classes/searchEngine/adapter/interface.class.php +++ /dev/null @@ -1,60 +0,0 @@ -app = $app; - return $this; - } - - /** - * - * @param searchEngine_options $options - * @return searchEngine_adapter_phrasea_engine - */ - public function set_options(searchEngine_options $options) - { - $this->options = $options; - - $this->opt_search_type = (int) $options->get_search_type(); - $this->opt_bases = $options->get_bases(); - $this->opt_fields = $options->get_fields(); - $this->opt_date_field = $options->get_date_fields(); - $this->opt_max_date = $options->get_max_date(); - $this->opt_min_date = $options->get_min_date(); - - if (in_array($options->get_record_type(), array('image', 'video', 'audio', 'document', 'flash'))) - $this->opt_record_type = $options->get_record_type(); - - foreach ($options->get_fields() as $field) { - if (trim($field) === 'phraseanet--all--fields') { - $this->opt_fields = array(); - break; - } - } - - $this->opt_status = $options->get_status(); - - return $this; - } - - /** - * - * @param $proposals - * @return string - */ - protected static function proposalsToHTML($proposals) - { - - $html = ''; - $b = true; - foreach ($proposals["BASES"] as $zbase) { - if ((int) (count($proposals["BASES"]) > 1) && count($zbase["TERMS"]) > 0) { - $style = $b ? 'style="margin-top:0px;"' : ''; - $b = false; - $html .= "

" . sprintf(_('reponses::propositions pour la base %s'), $zbase["NAME"]) . "

"; - } - $t = true; - foreach ($zbase["TERMS"] as $path => $props) { - $style = $t ? 'style="margin-top:0px;"' : ''; - $t = false; - $html .= "

" . sprintf(_('reponses::propositions pour le terme %s'), $props["TERM"]) . "

"; - $html .= $props["HTML"]; - } - } - $html .= ''; - - return($html); - } - - /** - * - * @return string - */ - public function get_propositions() - { - if (isset($this->qp['main'])) { - $proposals = self::proposalsToHTML($this->qp['main']->proposals); - if (trim($proposals) !== '') { - return "
" . $this->qp['main']->proposals["QRY"] - . "
" . $proposals . "
"; - } - } - - return null; - } - - /** - * - * @param int $query - * @param int $offset - * @param int $perPage - * @return searchEngine_results - */ - public function results($query, $offset, $perPage) - { - - assert(is_int($offset)); - assert($offset >= 0); - assert(is_int($perPage)); - - $page = floor($offset / $perPage) + 1; - - $this->current_page = $page; - $this->perPage = $perPage; - - $page = $this->get_current_page(); - - if (trim($query) === '') - $query = "all"; - if ($this->opt_record_type != '') { - $query .= ' AND recordtype=' . $this->opt_record_type; - } - - $sql = 'SELECT query, query_time FROM cache WHERE session_id = :ses_id'; - $stmt = $this->app['phraseanet.appbox']->get_connection()->prepare($sql); - $stmt->execute(array(':ses_id' => $this->app['session']->get('phrasea_session_id'))); - $row = $stmt->fetch(PDO::FETCH_ASSOC); - $stmt->closeCursor(); - - $date_obj = new DateTime('-10 min'); - $date_quest = new DateTime($row['query_time']); - - $reseted = $this->reseted; - - $reseted = false; - if ($this->reseted) - $reseted = true; - if ($query != $row['query']) - $reseted = true; - if ($date_obj > $date_quest) - $reseted = true; - - if ($reseted === true) { - $this->reset_cache(); - self::addQuery($query); - self::query(); - } else { - $this->total_available = $this->total_results = $this->app['session']->get('phraseanet.phrasea_engine_n_results'); - } - - $results = new set_result(); - - $perPage = $this->get_per_page(); - $page = $this->get_current_page(); - $this->offset_start = $courcahnum = (($page - 1) * $perPage); - - $res = phrasea_fetch_results( - $this->app['session']->get('phrasea_session_id'), (int) (($page - 1) * $perPage) + 1, $perPage, false - ); - - $rs = array(); - if (isset($res['results']) && is_array($res['results'])) - $rs = $res['results']; - - foreach ($rs as $irec => $data) { - try { - $sbas_id = phrasea::sbasFromBas($this->app, $data['base_id']); - - $record = new record_adapter( - $this->app, - $sbas_id, - $data['record_id'], - $courcahnum - ); - - $results->add_element($record); - } catch (Exception $e) { - - } - $courcahnum ++; - } - - return new searchEngine_results($results, $this); - } - - /** - * - * @return searchEngine_adapter_phrasea_engine - */ - public function reset_cache() - { - phrasea_clear_cache($this->app['session']->get('phrasea_session_id')); - $this->reseted = true; - - return $this; - } - - /** - * - * @return array - */ - public function get_status() - { - $infos = phrasea_info(); - - $status = array(); - foreach ($infos as $key => $value) { - $status[] = array($key, $value); - } - - return $status; - } - - public function get_suggestions($I18n) - { - $props = array(); - foreach ($this->qp['main']->proposals['QUERIES'] as $prop) { - $props[] = array( - 'value' => $prop - , 'current' => false - , 'hits' => null - ); - } - - return $props; - } - - /** - * - * @return string - */ - public function get_parsed_query() - { - return $this->query; - } - - /** - * - * @return searchEngine_adapter_phrasea_engine - */ - protected function query() - { - $dateLog = date("Y-m-d H:i:s"); - $nbanswers = 0; - - $sql = 'UPDATE cache SET query = :query, query_time = NOW() - WHERE session_id = :ses_id'; - - $params = array( - 'query' => $this->get_parsed_query() - , ':ses_id' => $this->app['session']->get('phrasea_session_id') - ); - - $stmt = $this->app['phraseanet.appbox']->get_connection()->prepare($sql); - $stmt->execute($params); - $stmt->closeCursor(); - - $total_time = 0; - - $sort = ''; - - if ($this->options->get_sortby()) { - switch ($this->options->get_sortord()) { - case searchEngine_options::SORT_MODE_ASC: - $sort = '+'; - break; - case searchEngine_options::SORT_MODE_DESC: - default: - $sort = '-'; - break; - } - $sort .= '0' . $this->options->get_sortby(); - } - - foreach ($this->queries as $sbas_id => $qry) { - $BF = array(); - - foreach ($this->options->get_business_fields() as $base_id) { - $BF[] = phrasea::collFromBas($this->app, $base_id); - } - - $this->results[$sbas_id] = phrasea_query2( - $this->app['session']->get('phrasea_session_id') - , $sbas_id - , $this->colls[$sbas_id] - , $this->arrayq[$sbas_id] - , $this->app['phraseanet.registry']->get('GV_sit') - , (string) $this->app['phraseanet.user']->get_id() - , false - , $this->opt_search_type == 1 ? PHRASEA_MULTIDOC_REGONLY : PHRASEA_MULTIDOC_DOCONLY - , $sort - , $BF - ); - - $total_time += $this->results[$sbas_id]['time_all']; - - if ($this->results[$sbas_id]) - $nbanswers += $this->results[$sbas_id]["nbanswers"]; - - $logger = $this->app['phraseanet.logger']($this->app['phraseanet.appbox']->get_databox($sbas_id)); - - $conn2 = connection::getPDOConnection($this->app, $sbas_id); - - $sql3 = "INSERT INTO log_search - (id, log_id, date, search, results, coll_id ) - VALUES - (null, :log_id, :date, :query, :nbresults, :colls)"; - - $params = array( - ':log_id' => $logger->get_id() - , ':date' => $dateLog - , ':query' => $this->query - , ':nbresults' => $this->results[$sbas_id]["nbanswers"] - , ':colls' => implode(',', $this->colls[$sbas_id]) - ); - - $stmt = $conn2->prepare($sql3); - $stmt->execute($params); - $stmt->closeCursor(); - } - - $this->total_time = $total_time; - - User_Adapter::saveQuery($this->app, $this->query); - - $this->app['session']->set('phraseanet.phrasea_engine_n_results', $nbanswers); - - $this->total_available = $this->total_results = $nbanswers; - - return $this; - } - - /** - * - * @param int $sbas - * @return searchEngine_adapter_phrasea_engine - */ - protected function singleParse($sbas) - { - $this->qp[$sbas] = new searchEngine_adapter_phrasea_queryParser($this->app, $this->app['locale']); - $this->qp[$sbas]->debug = false; - if ($sbas == 'main') - $simple_treeq = $this->qp[$sbas]->parsequery($this->query); - else - $simple_treeq = $this->qp[$sbas]->parsequery($this->queries[$sbas]); - - $this->qp[$sbas]->priority_opk($simple_treeq); - $this->qp[$sbas]->distrib_opk($simple_treeq); - $this->needthesaurus[$sbas] = false; - - $this->indep_treeq[$sbas] = $this->qp[$sbas]->extendThesaurusOnTerms($simple_treeq, true, true, false); - $this->needthesaurus[$sbas] = $this->qp[$sbas]->containsColonOperator($this->indep_treeq[$sbas]); - - return $this; - } - - /** - * - * @param string $query - * @return searchEngine_adapter_phrasea_engine - */ - protected function addQuery($query) - { - $qry = ''; - if (trim($query) != '') { - $qry .= trim($query); - } - - foreach ($this->app['phraseanet.appbox']->get_databoxes() as $databox) { - foreach ($databox->get_collections() as $coll) { - if (in_array($coll->get_base_id(), $this->opt_bases)) { - $this->queries[$databox->get_sbas_id()] = $qry; - break; - } - } - } - $this->query = $qry; - - foreach ($this->queries as $sbas => $qs) { - if ($sbas === 'main') - continue; - if (count($this->opt_status) > 0) { - $requestStat = 'xxxx'; - - for ($i = 4; ($i <= 64); $i ++ ) { - if ( ! isset($this->opt_status[$i])) { - $requestStat = 'x' . $requestStat; - continue; - } - $set = false; - $val = ''; - if (isset($this->opt_status[$i][$sbas]) && $this->opt_status[$i][$sbas] == '0') { - $set = true; - $val = '0'; - } - if (isset($this->opt_status[$i][$sbas]) && $this->opt_status[$i][$sbas] == '1') { - if ($set) - $val = 'x'; - else - $val = '1'; - } - $requestStat = ( $val != '' ? $val : 'x' ) . $requestStat; - } - $requestStat = trim(ltrim($requestStat, 'x')); - if ($requestStat !== '') - $this->queries[$sbas] .= ' and (recordstatus=' . $requestStat . ')'; - } - if (count($this->opt_fields) > 0) { - $this->queries[$sbas] .= ' dans (' . implode(' ou ', $this->opt_fields) . ')'; - } - if (($this->opt_min_date || $this->opt_max_date) && $this->opt_date_field != '') { - if ($this->opt_min_date) - $this->queries[$sbas] .= ' AND ( ' . implode(' >= ' . $this->opt_min_date->format('Y-m-d') . ' OR ', $this->opt_date_field) . ' >= ' . $this->opt_min_date->format('Y-m-d') . ' ) '; - if ($this->opt_max_date) - $this->queries[$sbas] .= ' AND ( ' . implode(' <= ' . $this->opt_max_date->format('Y-m-d') . ' OR ', $this->opt_date_field) . ' <= ' . $this->opt_max_date->format('Y-m-d') . ' ) '; - } - } - - $this->singleParse('main'); - foreach ($this->queries as $sbas => $qryBas) - $this->singleParse($sbas); - - foreach ($this->app['phraseanet.appbox']->get_databoxes() as $databox) { - if ( ! isset($this->queries[$databox->get_sbas_id()])) - continue; - - $sbas_id = $databox->get_sbas_id(); - $this->colls[$sbas_id] = array(); - foreach ($databox->get_collections() as $coll) { - if (in_array($coll->get_base_id(), $this->opt_bases)) - $this->colls[$sbas_id][] = (int) $coll->get_base_id(); - } - if (sizeof($this->colls[$sbas_id]) <= 0) - continue; - if ($this->needthesaurus[$sbas_id]) { - $domthesaurus = $databox->get_dom_thesaurus(); - - if ($domthesaurus) { - $this->qp[$sbas_id]->thesaurus2($this->indep_treeq[$sbas_id], $sbas_id, $databox->get_dbname(), $domthesaurus, true); - $this->qp['main']->thesaurus2($this->indep_treeq['main'], $sbas_id, $databox->get_dbname(), $domthesaurus, true); - } - } - - if ($this->qp[$sbas_id]->errmsg != "") { - $this->error .= ' ' . $this->qp[$sbas_id]->errmsg; - } - - $emptyw = false; - - $this->qp[$sbas_id]->set_default($this->indep_treeq[$sbas_id], $emptyw); - $this->qp[$sbas_id]->distrib_in($this->indep_treeq[$sbas_id]); - $this->qp[$sbas_id]->factor_or($this->indep_treeq[$sbas_id]); - $this->qp[$sbas_id]->setNumValue($this->indep_treeq[$sbas_id], $databox->get_sxml_structure()); - $this->qp[$sbas_id]->thesaurus2_apply($this->indep_treeq[$sbas_id], $sbas_id); - $this->arrayq[$sbas_id] = $this->qp[$sbas_id]->makequery($this->indep_treeq[$sbas_id]); - $this->results[$sbas_id] = NULL; - } - - return $this; - } - - public function build_excerpt($query, array $fields, record_adapter $record) - { - $ret = array(); - - $res = phrasea_fetch_results( - $this->app['session']->get('phrasea_session_id'), ($record->get_number() + 1), 1, true, "[[em]]", "[[/em]]" - ); - - if ( ! isset($res['results']) || ! is_array($res['results'])) { - return array(); - } - - $rs = $res['results']; - $res = array_shift($rs); - if ( ! isset($res['xml'])) { - return array(); - } - - $sxe = simplexml_load_string($res['xml']); - - foreach ($fields as $name => $field) { - if (false !== $sxe && $sxe->description && $sxe->description->$name) { - $val = array(); - foreach ($sxe->description->$name as $value) { - $val[] = str_replace(array('[[em]]', '[[/em]]'), array('', ''), (string) $value); - } - $separator = $field['separator'] ? $field['separator'][0] : ''; - $val = implode(' ' . $separator . ' ', $val); - } else { - $val = $field['value']; - } - - $ret[] = $val; - } - - return $ret; - } -} diff --git a/lib/classes/searchEngine/adapter/phrasea/queryParser.class.php b/lib/classes/searchEngine/adapter/phrasea/queryParser.class.php deleted file mode 100644 index 53445ad0e4..0000000000 --- a/lib/classes/searchEngine/adapter/phrasea/queryParser.class.php +++ /dev/null @@ -1,1896 +0,0 @@ - array("NODETYPE" => PHRASEA_OP_AND, "CANNUM" => false), - "and" => array("NODETYPE" => PHRASEA_OP_AND, "CANNUM" => false), - "ou" => array("NODETYPE" => PHRASEA_OP_OR, "CANNUM" => false), - "or" => array("NODETYPE" => PHRASEA_OP_OR, "CANNUM" => false), - "sauf" => array("NODETYPE" => PHRASEA_OP_EXCEPT, "CANNUM" => false), - "except" => array("NODETYPE" => PHRASEA_OP_EXCEPT, "CANNUM" => false), - "pres" => array("NODETYPE" => PHRASEA_OP_NEAR, "CANNUM" => true), - "near" => array("NODETYPE" => PHRASEA_OP_NEAR, "CANNUM" => true), - "avant" => array("NODETYPE" => PHRASEA_OP_BEFORE, "CANNUM" => true), - "before" => array("NODETYPE" => PHRASEA_OP_BEFORE, "CANNUM" => true), - "apres" => array("NODETYPE" => PHRASEA_OP_AFTER, "CANNUM" => true), - "after" => array("NODETYPE" => PHRASEA_OP_AFTER, "CANNUM" => true), - "dans" => array("NODETYPE" => PHRASEA_OP_IN, "CANNUM" => false), - "in" => array("NODETYPE" => PHRASEA_OP_IN, "CANNUM" => false) - ); - public $opk = array( - "<" => array("NODETYPE" => PHRASEA_OP_LT, "CANNUM" => false), - ">" => array("NODETYPE" => PHRASEA_OP_GT, "CANNUM" => false), - "<=" => array("NODETYPE" => PHRASEA_OP_LEQT, "CANNUM" => false), - ">=" => array("NODETYPE" => PHRASEA_OP_GEQT, "CANNUM" => false), - "<>" => array("NODETYPE" => PHRASEA_OP_NOTEQU, "CANNUM" => false), - "=" => array("NODETYPE" => PHRASEA_OP_EQUAL, "CANNUM" => false), - ":" => array("NODETYPE" => PHRASEA_OP_COLON, "CANNUM" => false) - ); - public $spw = array( - "all" => array( - "CLASS" => "PHRASEA_KW_ALL", "NODETYPE" => PHRASEA_KW_ALL, "CANNUM" => false - ), - "last" => array( - "CLASS" => "PHRASEA_KW_LAST", "NODETYPE" => PHRASEA_KW_LAST, "CANNUM" => true - ), - // "first" => array("CLASS"=>PHRASEA_KW_FIRST, "CANNUM"=>true), - // "premiers" => array("CLASS"=>PHRASEA_KW_FIRST, "CANNUM"=>true), - "tout" => array( - "CLASS" => "PHRASEA_KW_ALL", "NODETYPE" => PHRASEA_KW_ALL, "CANNUM" => false - ), - "derniers" => array( - "CLASS" => "PHRASEA_KW_LAST", "NODETYPE" => PHRASEA_KW_LAST, "CANNUM" => true - ) - ); - public $quoted_defaultop = array( - "VALUE" => "default_avant", "NODETYPE" => PHRASEA_OP_BEFORE, "PNUM" => 0 - ); - public $defaultop = array( - "VALUE" => "and", "NODETYPE" => PHRASEA_OP_AND, "PNUM" => NULL - ); - public $defaultlast = 12; - public $phq; - public $errmsg = ""; - - /** - * - * @var boolean - */ - public $debug = false; - - /** - * un tableau qui contiendra des propositions de thesaurus - * pour les termes de l'arbre simple - * - * @var array - */ - public $proposals = Array("QRY" => "", "BASES" => array(), "QUERIES" => array()); - - /** - * Current language for thesaurus - * @var - */ - public $lng = null; - protected $unicode; - protected $app; - - public function __construct(Application $app, $lng = "???") - { - $this->app = $app; - $this->lng = $lng; - $this->unicode = new unicode(); - - return $this; - } - - public function mb_trim($s, $encoding) - { - return(trim($s)); - } - - public function mb_ltrim($s, $encoding) - { - return(ltrim($s)); - } - - public function parsequery($phq) - { - if ($this->debug) { - for ($i = 0; $i < mb_strlen($phq, 'UTF-8'); $i++) { - $c = mb_substr($phq, $i, 1, 'UTF-8'); - printf("// %s : '%s' (%d octets)\n", $i, $c, strlen($c)); - } - } - - $this->proposals = Array("QRY" => "", "BASES" => array(), "QUERIES" => array()); - $this->phq = $this->mb_trim($phq, 'UTF-8'); - if ($this->phq != "") { - return($this->maketree(0)); - } else { - - if ($this->errmsg != "") { - $this->errmsg .= sprintf("\\n"); - } - - $this->errmsg .= _('qparser::la question est vide'); - - return(null); - } - } - - public function astext($tree) - { - switch ($tree["CLASS"]) { - case "SIMPLE": - if (is_array($tree["VALUE"])) { - return(implode(" ", $tree["VALUE"])); - } else { - - return($tree["VALUE"]); - } - break; - case "QSIMPLE": - if (is_array($tree["VALUE"])) { - return("\"" . implode(" ", $tree["VALUE"]) . "\""); - } else { - return("\"" . $tree["VALUE"] . "\""); - } - break; - case "PHRASEA_KW_ALL": - return($tree["VALUE"][0]); - break; - case "PHRASEA_KW_LAST": - if ($tree["PNUM"] !== null) { - return("" . $tree["VALUE"][0] . "[" . $tree["PNUM"] . "]"); - } else { - return($tree["VALUE"][0]); - } - break; - case "OPS": - case "OPK": - if (isset($tree["PNUM"])) { - return("(" . $this->astext($tree["LB"]) . " " . $tree["VALUE"] . "[" . $tree["PNUM"] . "] " . $this->astext($tree["RB"]) . ")"); - } else { - return("(" . $this->astext($tree["LB"]) . " " . $tree["VALUE"] . " " . $this->astext($tree["RB"]) . ")"); - } - break; - } - } - - public function astable(&$tree) - { - $this->calc_complexity($tree); - $txt = ""; - $this->astable2($txt, $tree); - $txt = "\n\n" . $txt . "\n
\n"; - - return($txt); - } - - public function calc_complexity(&$tree) - { - if ($tree) { - if ($tree["CLASS"] == "OPS" || $tree["CLASS"] == "OPK") { - return($tree["COMPLEXITY"] = $this->calc_complexity($tree["LB"]) + $this->calc_complexity($tree["RB"])); - } else { - return($tree["COMPLEXITY"] = 1); - } - } - } - - public function astable2(&$out, &$tree, $depth = 0) - { - switch ($tree["CLASS"]) { - case "SIMPLE": - if (is_array($tree["VALUE"])) - $txt = implode(" ", $tree["VALUE"]); - else - $txt = $tree["VALUE"]; - $out .= "\t" . $txt . "\n"; - break; - case "QSIMPLE": - if (is_array($tree["VALUE"])) - $txt = implode(" ", $tree["VALUE"]); - else - $txt = $tree["VALUE"]; - $out .= "\t"" . $txt . ""\n"; - break; - case "PHRASEA_KW_ALL": - $out .= "\t" . $tree["VALUE"][0] . "\n"; - break; - case "PHRASEA_KW_LAST": - if ($tree["PNUM"] !== null) - $out .= "\t" . $tree["VALUE"][0] . "[" . $tree["PNUM"] . "]" . "\n"; - else - $out .= "\t" . $tree["VALUE"][0] . "\n"; - break; - case "OPS": - case "OPK": - $op = $tree["VALUE"]; - if (isset($tree["PNUM"])) - $op .= "[" . $tree["PNUM"] . "]"; - $out .= "\t$op\n"; - $this->astable2($out, $tree["LB"], $depth + 1); - $this->astable2($out, $tree["RB"], $depth + 1); - $out .= "\n\n"; - break; - } - } - - public function dumpDiv(&$tree) - { - print("
\n"); - $this->dumpDiv2($tree); - print("
\n"); - } - - public function dumpDiv2(&$tree, $depth = 0) - { - switch ($tree["CLASS"]) { - case "SIMPLE": - if (is_array($tree["VALUE"])) - $s = implode(" , ", $tree["VALUE"]); - else - $s = $tree["VALUE"]; - print(str_repeat("\t", $depth) . "" . $s . "\n"); - case "QSIMPLE": - $s = ""; - if (is_array($tree["VALUE"])) - $s = implode(" , ", $tree["VALUE"]); - else - $s = $tree["VALUE"]; - print(str_repeat("\t", $depth) . """ . $s . ""\n"); - break; - case "PHRASEA_KW_ALL": - printf(str_repeat("\t", $depth) . "%s\n", $tree["VALUE"][0]); - break; - case "PHRASEA_KW_LAST": - if ($tree["PNUM"] !== null) - printf(str_repeat("\t", $depth) . "%s %s\n", $tree["VALUE"][0], $tree["PNUM"]); - else - printf(str_repeat("\t", $depth) . "%s\n", $tree["VALUE"][0]); - break; - // case PHRASEA_KW_FIRST: - // if($tree["PNUM"]!==null) - // printf("%s %s", $tree["VALUE"], $tree["PNUM"]); - // else - // printf("%s", $tree["VALUE"]); - // break; - case "OPS": - case "OPK": - print(str_repeat("\t", $depth) . "
\n"); - $this->dumpDiv2($tree["LB"], $depth + 1); - print(str_repeat("\t", $depth) . "
\n"); - print(str_repeat("\t", $depth) . "
\n"); - if (isset($tree["PNUM"])) - printf(str_repeat("\t", $depth + 1) . " %s[%s]\n", $tree["VALUE"], $tree["PNUM"]); - else - printf(str_repeat("\t", $depth + 1) . " %s\n", $tree["VALUE"]); - print(str_repeat("\t", $depth) . "
\n"); - print(str_repeat("\t", $depth) . "
\n"); - $this->dumpDiv2($tree["RB"], $depth + 1); - print(str_repeat("\t", $depth) . "
\n"); - - break; - } - } - - public function dump($tree) - { - switch ($tree["CLASS"]) { - case "SIMPLE": - if (is_array($tree["VALUE"])) - $s = implode("
, ", $tree["VALUE"]); - else - $s = $tree["VALUE"]; - print("" . $s . ""); - break; - case "QSIMPLE": - if (is_array($tree["VALUE"])) - $s = implode(" , ", $tree["VALUE"]); - else - $s = $tree["VALUE"]; - print(""" . $s . """); - break; - case "PHRASEA_KW_ALL": - printf("%s", $tree["VALUE"][0]); - break; - case "PHRASEA_KW_LAST": - if ($tree["PNUM"] !== null) - printf("%s %s", $tree["VALUE"][0], $tree["PNUM"]); - else - printf("%s", $tree["VALUE"][0]); - break; - // case PHRASEA_KW_FIRST: - // if($tree["PNUM"]!==null) - // printf("%s %s", $tree["VALUE"], $tree["PNUM"]); - // else - // printf("%s", $tree["VALUE"]); - // break; - case "OPS": - case "OPK": - print(""); - print(""); - print(""); - print(""); - print(""); - print(""); - print(""); - print(""); - print("
"); - if (isset($tree["PNUM"])) - printf(" %s[%s] ", $tree["VALUE"], $tree["PNUM"]); - else - printf(" %s ", $tree["VALUE"]); - print("
"); - print($this->dump($tree["LB"])); - print(""); - print($this->dump($tree["RB"])); - print("
"); - break; - } - } - - public function priority_opk(&$tree, $depth = 0) - { - if (!$tree) { - return; - } - - if ($tree["CLASS"] == "OPK" && ($tree["LB"]["CLASS"] == "OPS" || $tree["LB"]["CLASS"] == "OPK")) { - // on a un truc du genre ((a ou b) < 5), on le transforme en (a ou (b < 5)) - $t = $tree["LB"]; - $tree["LB"] = $t["RB"]; - $t["RB"] = $tree; - $tree = $t; - } - if (isset($tree["LB"])) { - $this->priority_opk($tree["LB"], $depth + 1); - }if (isset($tree["RB"])) { - $this->priority_opk($tree["RB"], $depth + 1); - } - } - - public function distrib_opk(&$tree, $depth = 0) - { - if (!$tree) { - return; - } - - if ($tree["CLASS"] == "OPK" && ($tree["RB"]["CLASS"] == "OPS")) { - // on a un truc du genre (a = (5 ou 6)), on le transforme en ((a = 5) ou (a = 6)) - $tmp = array("CLASS" => $tree["CLASS"], - "NODETYPE" => $tree["NODETYPE"], - "VALUE" => $tree["VALUE"], - "PNUM" => $tree["PNUM"], - "LB" => $tree["LB"], - "RB" => $tree["RB"]["RB"], - "DEPTH" => $tree["LB"]["DEPTH"]); - $t = $tree["RB"]; - $tree["RB"] = $t["LB"]; - $t["LB"] = $tree; - $t["RB"] = $tmp; - $tree = $t; - } - if (isset($tree["LB"])) - $this->distrib_opk($tree["LB"], $depth + 1); - if (isset($tree["RB"])) - $this->distrib_opk($tree["RB"], $depth + 1); - } - - public function thesaurus2_apply(&$tree, $bid) - { - if (!$tree) { - return; - } - - if (($tree["CLASS"] == "SIMPLE" || $tree["CLASS"] == "QSIMPLE") && isset($tree["SREF"]) && isset($tree["SREF"]["TIDS"])) { - $tids = array(); - foreach ($tree["SREF"]["TIDS"] as $tid) { - if ($tid["bid"] == $bid) - $tids[] = $tid["pid"]; - } - if (count($tids) >= 1) { - /* - if (count($tids)==1) { - // on cherche un id simple, on utilisera la syntaxe sql 'like' (l'extension repérera elle méme la syntaxe car la value finit par '%') - $val = str_replace(".", "d", $tids[0]) . "d%"; - $tree["VALUE"] = array($val); - } else { - // on cherche plusieurs id's, on utilisera la syntaxe 'regexp' (l'extension repérera elle méme la syntaxe car la value finit par '$' - $val = ""; - foreach($tids as $tid) - $val .= ($val?"|":"") . "(" . str_replace(".", "d", $tid) . "d.*)"; - $tree["VALUE"] = array("^" . $val); - } - */ - $tree["VALUE"] = array(); - foreach ($tids as $tid) - $tree["VALUE"][] = str_replace(".", "d", $tid) . "d%";; - } else { - // le mot n'est pas dans le thesaurus - } - /* - */ - } - if ($tree["CLASS"] == "OPS" || $tree["CLASS"] == "OPK") { - $this->thesaurus2_apply($tree["LB"], $bid); - $this->thesaurus2_apply($tree["RB"], $bid); - } - } - - // étend (ou remplace) la recherche sur les termes simples en recherche sur thesaurus - // ex: (a et b) - // full-text only : ==> (a et b) - // thesaurus only : ==> ((th:a) et (th:b)) - // ft et thesaurus : ==> ((a ou (th:a)) et (b ou (th:b))) - // RETOURNE l'arbre résultat sans modifier l'arbre d'origine - public function extendThesaurusOnTerms(&$tree, $useFullText, $useThesaurus, $keepfuzzy) - { - $copy = $tree; - $this->_extendThesaurusOnTerms($tree, $copy, $useFullText, $useThesaurus, $keepfuzzy, 0, ""); - - $this->proposals["QRY"] = "" . $this->_queryAsHTML($tree) . ""; - - return($copy); - } - - public function _extendThesaurusOnTerms(&$tree, &$copy, $useFullText, $useThesaurus, $keepfuzzy, $depth, $path) - { - if ($depth == 0) - $ret = $tree; - if (!$useThesaurus) { - return; // full-text only : inchangé - } - - if (($tree["CLASS"] == "SIMPLE" || $tree["CLASS"] == "QSIMPLE")) { - if (isset($tree["CONTEXT"])) - $copy = $this->_extendToThesaurus_Simple($tree, false, $keepfuzzy, $path); - else - $copy = $this->_extendToThesaurus_Simple($tree, $useFullText, $keepfuzzy, $path); - } else { - if ($tree["CLASS"] == "OPK" && $tree["NODETYPE"] == PHRASEA_OP_COLON) { - // on a 'field:value' , on traite 'value' - $tree["RB"]["PATH"] = $copy["RB"]["PATH"] = $path . "R"; - if (isset($tree["RB"]["CONTEXT"])) - $copy["CONTEXT"] = $tree["CONTEXT"] = $tree["RB"]["CONTEXT"]; - else - if (!$keepfuzzy) - $copy["CONTEXT"] = $tree["CONTEXT"] = "*"; - - $copy["RB"]["SREF"] = &$tree["RB"]; - } else { - $recursL = $recursR = false; - if ($tree["CLASS"] == "OPS" && ($tree["NODETYPE"] == PHRASEA_OP_AND || $tree["NODETYPE"] == PHRASEA_OP_OR || $tree["NODETYPE"] == PHRASEA_OP_EXCEPT)) { - // on a une branche à gauche de 'ET', 'OU', 'SAUF' - $recursL = true; - } - if ($tree["CLASS"] == "OPS" && ($tree["NODETYPE"] == PHRASEA_OP_AND || $tree["NODETYPE"] == PHRASEA_OP_OR || $tree["NODETYPE"] == PHRASEA_OP_EXCEPT)) { - // on a une branche à droite de 'ET', 'OU', 'SAUF' - $recursR = true; - } - if ($recursL) - $this->_extendThesaurusOnTerms($tree["LB"], $copy["LB"], $useFullText, $useThesaurus, $keepfuzzy, $depth + 1, $path . "L"); - if ($recursR) - $this->_extendThesaurusOnTerms($tree["RB"], $copy["RB"], $useFullText, $useThesaurus, $keepfuzzy, $depth + 1, $path . "R"); - } - } - } - - // étend (ou remplace) un terme cherché en 'full-text' à une recherche thesaurus (champ non spécifié, tout le thésaurus = '*') - // le contexte éventuel est rapporté à l'opérateur ':' - // ex : a[k] ==> (a ou (TH :[k] a)) - public function _extendToThesaurus_Simple(&$simple, $keepFullText, $keepfuzzy, $path) - { - $simple["PATH"] = $path; - $context = null; - if (isset($simple["CONTEXT"])) { - $context = $simple["CONTEXT"]; - // unset($simple["CONTEXT"]); - } - if ($keepFullText) { - // on fait un OU entre la recherche ft et une recherche th - $tmp = array("CLASS" => "OPS", - "NODETYPE" => PHRASEA_OP_OR, - "VALUE" => "OR", - "PNUM" => null, - "DEPTH" => $simple["DEPTH"], - "LB" => $simple, - "RB" => array("CLASS" => "OPK", - "NODETYPE" => PHRASEA_OP_COLON, - "VALUE" => ":", - // "CONTEXT"=>$context, - "PNUM" => null, - "DEPTH" => $simple["DEPTH"] + 1, - "LB" => array("CLASS" => "SIMPLE", - "NODETYPE" => PHRASEA_KEYLIST, - "VALUE" => array("*"), - "DEPTH" => $simple["DEPTH"] + 2 - ), - "RB" => $simple - ) - ); - // on vire le contexte du coté fulltext - unset($tmp["LB"]["CONTEXT"]); - // ajoute le contexte si nécéssaire - if ($context !== null) - $tmp["RB"]["CONTEXT"] = $context; - else - if (!$keepfuzzy) - $tmp["RB"]["CONTEXT"] = "*"; - // corrige les profondeurs des 2 copies du 'simple' d'origine - $tmp["LB"]["DEPTH"] += 1; - $tmp["RB"]["RB"]["DEPTH"] += 2; - // note une référence vers le terme d'origine - $tmp["RB"]["RB"]["SREF"] = &$simple; - $tmp["RB"]["RB"]["PATH"] = $path; - } else { - // on remplace le ft par du th - $tmp = array("CLASS" => "OPK", - "NODETYPE" => PHRASEA_OP_COLON, - "VALUE" => ":", - // "CONTEXT"=>$context, - "PNUM" => null, - "DEPTH" => $simple["DEPTH"] + 1, - "LB" => array("CLASS" => "SIMPLE", - "NODETYPE" => PHRASEA_KEYLIST, - "VALUE" => array("*"), - "DEPTH" => $simple["DEPTH"] + 1 - ), - "RB" => $simple - ); - // ajoute le contexte si nécéssaire - if ($context !== null) - $tmp["CONTEXT"] = $context; - else - if (!$keepfuzzy) - $tmp["CONTEXT"] = "*"; - // corrige la profondeur de la copie du 'simple' d'origine - $tmp["RB"]["DEPTH"] += 1; - // note une référence vers le terme d'origine - $tmp["RB"]["SREF"] = &$simple; - $tmp["RB"]["PATH"] = $path; - } - - return($tmp); - } - - public function thesaurus2(&$tree, $bid, $name, &$domthe, $searchsynonyms = true, $depth = 0) - { - if ($this->debug) - print("thesaurus2:\n\$tree=" . var_export($tree, true) . "\n"); - - if ($depth == 0) - $this->proposals["BASES"]["b$bid"] = array("BID" => $bid, "NAME" => $name, "TERMS" => array()); - - if (!$tree) { - return(0); - } - - $ambigus = 0; - if ($tree["CLASS"] == "OPK" && $tree["NODETYPE"] == PHRASEA_OP_COLON) { -// $ambigus = $this->setTids($tree, $tree["RB"], $bid, $domthe, $searchsynonyms); - $ambigus = $this->setTids($tree, $bid, $domthe, $searchsynonyms); - } elseif ($tree["CLASS"] == "OPS" || $tree["CLASS"] == "OPK") { - $ambigus += $this->thesaurus2($tree["LB"], $bid, $name, $domthe, $searchsynonyms, $depth + 1); - $ambigus += $this->thesaurus2($tree["RB"], $bid, $name, $domthe, $searchsynonyms, $depth + 1); - } - - return($ambigus); - } - - public function propAsHTML(&$node, &$html, $path, $depth = 0) - { - global $parm; - if ($depth > 0) { - $tsy = array(); - $lngfound = "?"; - for ($n = $node->firstChild; $n; $n = $n->nextSibling) { - if ($n->nodeName == "sy") { - $lng = $n->getAttribute("lng"); - if (!array_key_exists($lng, $tsy)) - $tsy[$lng] = array(); - $zsy = array("v" => $n->getAttribute("v"), "w" => $n->getAttribute("w"), "k" => $n->getAttribute("k")); - - if ($lngfound == "?" || ($lng == $this->lng && $lngfound != $lng)) { - $lngfound = $lng; - $syfound = $zsy; - } else { - - } - $tsy[$lng][] = $zsy; - } - } - $alt = ""; - foreach ($tsy as $lng => $tsy2) { - foreach ($tsy2 as $sy) { - $alt .= $alt ? "\n" : ""; - $alt .= "" . $lng . ": " . p4string::MakeString($sy["v"], "js"); - } - } - - $this->proposals['QUERIES'][$syfound["w"]] = $syfound["w"]; - - $thtml = $syfound["v"]; - $kjs = $syfound["k"] ? ("'" . p4string::MakeString($syfound["k"], "js") . "'") : "null"; - $wjs = "'" . p4string::MakeString($syfound["w"], "js") . "'"; - - if ($node->getAttribute("term")) { - $thtml = "" . $thtml . ""; - $node->removeAttribute("term"); - } - - $tab = str_repeat("\t", $depth); - $html .= $tab . "
\n"; - $html .= $tab . "\t" . $thtml . "\n"; - } - - $tsort = array(); - for ($n = $node->firstChild; $n; $n = $n->nextSibling) { - if ($n->nodeType == XML_ELEMENT_NODE && $n->getAttribute("marked")) { // only 'te' marked - $lngfound = '?'; - $syfound = '?'; - for ($n2 = $n->firstChild; $n2; $n2 = $n2->nextSibling) { - if ($n2->nodeName == 'sy') { - $lng = $n2->getAttribute('lng'); - if ($lngfound == "?" || ($lng == $this->lng && $lngfound != $lng)) { - $lngfound = $lng; - $syfound = $n2->getAttribute('w'); - } - } - } - $n->removeAttribute("marked"); - for ($i = 0; array_key_exists($syfound . $i, $tsort) && $i < 9999; $i++) - ; - $tsort[$syfound . $i] = $n; - } - } - ksort($tsort); - - foreach ($tsort as $n) { - $this->propAsHTML($n, $html, $path, $depth + 1); - } - - if ($depth > 0) - $html .= $tab . "
\n"; - } - - public function _queryAsHTML($tree, $depth = 0) - { - if ($depth == 0) { - $ambiguites = array("n" => 0, "refs" => array()); - } - switch ($tree["CLASS"]) { - case "SIMPLE": - case "QSIMPLE": - $w = is_array($tree["VALUE"]) ? implode(' ', $tree["VALUE"]) : $tree["VALUE"]; - if (isset($tree["PATH"])) { - $path = $tree["PATH"]; - if (isset($tree["CONTEXT"])) - $w .= ' [' . $tree["CONTEXT"] . ']'; - $txt = '"' . $w . '"'; - } else { - if (isset($tree["CONTEXT"])) - $w .= '[' . $tree["CONTEXT"] . ']'; - if ($tree["CLASS"] == "QSIMPLE") - $txt = '"' . $w . '"'; - else - $txt = $w; - } - - return($txt); - break; - case "PHRASEA_KW_ALL": - return($tree["VALUE"][0]); - break; - case "PHRASEA_KW_LAST": - if ($tree["PNUM"] !== null) { - return("" . $tree["VALUE"][0] . "[" . $tree["PNUM"] . "]"); - } else { - return($tree["VALUE"][0]); - } - break; - case "OPS": - case "OPK": - if (isset($tree["PNUM"])) { - return('(' . $this->_queryAsHTML($tree["LB"], $depth + 1) . ' ' . $tree["VALUE"] . '[' . $tree["PNUM"] . '] ' . $this->_queryAsHTML($tree["RB"], $depth + 1) . ')'); - } else { - return('(' . $this->_queryAsHTML($tree["LB"], $depth + 1) . ' ' . $tree["VALUE"] . ' ' . $this->_queryAsHTML($tree["RB"], $depth + 1) . ')'); - } - break; - } - } - - public function setTids(&$tree, $bid, &$domthe, $searchsynonyms) - { - if ($this->debug) - print("============================ setTids:\n\$tree=" . var_export($tree, true) . "\n"); - - // $this->proposals["BASES"]["b$bid"] = array("BID"=>$bid, "TERMS"=>array()); - - $ambigus = 0; - if (is_array($w = $tree["RB"]["VALUE"])) - $t = $w = implode(" ", $w); - - if (isset($tree["CONTEXT"])) { - if (!$tree["CONTEXT"]) { - $x0 = "@w=\"" . $w . "\" and not(@k)"; - } else { - if ($tree["CONTEXT"] == "*") { - $x0 = "@w=\"" . $w . "\""; - } else { - $x0 = "@w=\"" . $w . "\" and @k=\"" . $tree["CONTEXT"] . "\""; - $t .= " (" . $tree["CONTEXT"] . ")"; - } - } - } else { - $x0 = "@w=\"" . $w . "\""; - } - - $x = "/thesaurus//sy[" . $x0 . "]"; - - if ($this->debug) - printf("searching thesaurus with xpath='%s'
\n", $x); - - $dxp = new DOMXPath($domthe); - $nodes = $dxp->query($x); - - if (!isset($tree["RB"]["SREF"]["TIDS"])) - $tree["RB"]["SREF"]["TIDS"] = array(); - if ($nodes->length >= 1) { - if ($nodes->length == 1) { - // on cherche un id simple, on utilisera la syntaxe sql 'like' (l'extension repérera elle méme la syntaxe car la value finira par '%') - $this->addtoTIDS($tree["RB"], $bid, $nodes->item(0)); - // $this->thesaurusDOMNodes[] = $nodes->item(0); - } else { - // on cherche plusieurs id's, on utilisera la syntaxe 'regexp' (l'extension repérera elle meme la syntaxe car la value finira par '$') - $val = ""; - foreach ($nodes as $node) { - if (!isset($tree["CONTEXT"])) - $ambigus++; - $this->addtoTIDS($tree["RB"], $bid, $node); - } - } - $path = $tree["RB"]["SREF"]["PATH"]; - $prophtml = ""; - $this->propAsHTML($domthe->documentElement, $prophtml, $path); - $this->proposals["BASES"]["b$bid"]["TERMS"][$path]["HTML"] = $prophtml; - } else { - // le mot n'est pas dans le thesaurus - $tree = null; - } - - return($ambigus); - } - - public function containsColonOperator(&$tree) - { - if (!$tree) { - return(false); - } - if ($tree["CLASS"] == "OPK" && $tree["NODETYPE"] == PHRASEA_OP_COLON && ($tree["RB"]["CLASS"] == "SIMPLE" || $tree["RB"]["CLASS"] == "QSIMPLE")) { - return(true); - } - $ret = false; - if ($tree["CLASS"] == "OPS" || $tree["CLASS"] == "OPK") { - $ret |= $this->containsColonOperator($tree["LB"]); - $ret |= $this->containsColonOperator($tree["RB"]); - } - - return($ret); - } - - public function addtoTIDS(&$extendednode, $bid, $DOMnode) // ajoute un tid en évitant les doublons - { - $id = $DOMnode->getAttribute("id"); - $pid = $DOMnode->parentNode->getAttribute("id"); - $lng = $DOMnode->getAttribute("lng"); - $w = $DOMnode->getAttribute("w"); - $k = $DOMnode->getAttribute("k"); - $p = $DOMnode->parentNode->getAttribute("v"); // le terme général (pére) du terme recherché : utile pour la levée d'ambiguité - - $path = $extendednode["SREF"]["PATH"]; - if ($this->debug) - printf("found node id='%s', v='%s' w='%s', k='%s', p='%s' for node-path=%s \n", $id, $DOMnode->getAttribute("v"), $w, $k, $p, $path); - - if (!$k) - $k = null; - - $found = false; - foreach ($extendednode["SREF"]["TIDS"] as $ztid) { - if ($ztid["bid"] != $bid) - continue; - if ($ztid["pid"] == $pid) { - $found = true; - } else { -// if($ztid["w"]==$w && $ztid["k"]==$k && $ztid["lng"]==$lng) -// { -// // FATAL : il y a un doublon réel dans le thesaurus de cette base (méme terme, méme contexte) -// // printf("FATAL doublon on base %d (%s[%s])\n", $bid, $w, $k); -// $found = true; -// break; -// } - } - } - if (!$found) - $extendednode["SREF"]["TIDS"][] = array("bid" => $bid, "pid" => $pid, "id" => $id, "w" => $w, "k" => $k, "lng" => $lng, "p" => $p); - - // on liste les propositions de thésaurus pour ce node (dans l'arbre simple) - if (!isset($this->proposals["BASES"]["b$bid"]["TERMS"][$path])) { - // $this->proposals["TERMS"][$path] = array("TERM"=>implode(" ", $extendednode["VALUE"]), "PROPOSALS"=>array()); - $term = implode(" ", $extendednode["VALUE"]); - if (isset($extendednode["CONTEXT"]) && $extendednode["CONTEXT"]) { - $term .= " (" . $extendednode["CONTEXT"] . ")"; - } - $this->proposals["BASES"]["b$bid"]["TERMS"][$path] = array("TERM" => $term); // , "PROPOSALS"=>array() ); //, "PROPOSALS_TREE"=>new DOMDocument("1.0", "UTF-8")); - } -// printf("<%s id='%s'>
\n", $DOMnode->tagName, $DOMnode->getAttribute("id")); -// printf("found node <%s id='%s' w='%s' k='%s'>
\n", $DOMnode->nodeName, $DOMnode->getAttribute('id'), $DOMnode->getAttribute('w'), $DOMnode->getAttribute('k')); - // on marque le terme principal - $DOMnode->parentNode->setAttribute("term", "1"); - // on commence par marquer les fils directs. rappel:$DOMnode pointe sur un sy - for ($node = $DOMnode->parentNode->firstChild; $node; $node = $node->nextSibling) { - if ($node->nodeName == "te") { - $node->setAttribute("marked", "1"); - } - } - // puis par remonter au père - for ($node = $DOMnode->parentNode; $node && $node->nodeType == XML_ELEMENT_NODE && $node->parentNode; $node = $node->parentNode) { - $id = $node->getAttribute("id"); - if (!$id) - break; // on a dépassé la racine du thésaurus - $node->setAttribute("marked", "1"); - } - } - - public function astext_ambigu($tree, &$ambiguites, $mouseCallback = "void", $depth = 0) - { - if ($depth == 0) { - $ambiguites = array("n" => 0, "refs" => array()); - } - switch ($tree["CLASS"]) { - case "SIMPLE": - case "QSIMPLE": - $prelink = $postlink = ""; - $w = is_array($tree["VALUE"]) ? implode(" ", $tree["VALUE"]) : $tree["VALUE"]; - $tab = "\n" . str_repeat("\t", $depth); - if (isset($tree["TIDS"]) && count($tree["TIDS"]) > 1) { - $ambiguites["refs"][$n = $ambiguites["n"]] = &$tree; - $txt = $tab . ""; - $txt .= $tab . "\t\"" . $w . ""; - $txt .= $tab . "\t\""; - $txt .= $tab . "\n"; - $ambiguites["n"]++; - } else { - if (isset($tree["CONTEXT"])) - $w .= "[" . $tree["CONTEXT"] . "]"; - if ($tree["CLASS"] == "QSIMPLE") - $txt = $tab . "\"" . $w . "\"\n"; - else - $txt = $tab . "" . $w . "\n"; - } - - return($txt); - break; - case "PHRASEA_KW_ALL": - return($tree["VALUE"][0]); - break; - case "PHRASEA_KW_LAST": - if ($tree["PNUM"] !== null) { - return("" . $tree["VALUE"][0] . "[" . $tree["PNUM"] . "]"); - } else { - return($tree["VALUE"][0]); - } - break; - case "OPS": - case "OPK": - if (isset($tree["PNUM"])) { - return("(" . $this->astext_ambigu($tree["LB"], $ambiguites, $mouseCallback, $depth + 1) . " " . $tree["VALUE"] . "[" . $tree["PNUM"] . "] " . $this->astext_ambigu($tree["RB"], $ambiguites, $mouseCallback, $depth + 1) . ")"); - } else { - return("(" . $this->astext_ambigu($tree["LB"], $ambiguites, $mouseCallback, $depth + 1) . " " . $tree["VALUE"] . " " . $this->astext_ambigu($tree["RB"], $ambiguites, $mouseCallback, $depth + 1) . ")"); - } - break; - } - } - - public function get_ambigu(&$tree, $mouseCallback = "void", $depth = 0) - { - if (!$tree) { - return(""); - } - - unset($tree["DEPTH"]); - if ($tree["CLASS"] == "OPS" || $tree["CLASS"] == "OPK") { - $this->get_ambigu($tree["LB"], $mouseCallback, $depth + 1); - $this->get_ambigu($tree["RB"], $mouseCallback, $depth + 1); - } else { - - } - if ($depth == 0) { - $t_ambiguites = array(); - $r = ($this->astext_ambigu($tree, $t_ambiguites, $mouseCallback)); - $t_ambiguites["query"] = $r; - - return($t_ambiguites); - } - } - - public function set_default(&$tree, &$emptyw, $depth = 0) - { - if (!$tree) { - return(true); - } - - if ($tree["CLASS"] == "OPS" || $tree["CLASS"] == "OPK") { - if ($tree["CLASS"] == "OPS") { - if (!$this->set_default($tree["LB"], $emptyw, $depth + 1)) { - return(false); - } - if (!$this->set_default($tree["RB"], $emptyw, $depth + 1)) { - return(false); - } - } else { // OPK ! - // jy 20041223 : ne pas appliquer d'op. par def. derriere un op arith. - // ex : "d < 1/2/2003" : grouper la liste "1","2","2004" en "mot" unique - if (!$tree["LB"] || ($tree["LB"]["CLASS"] != "SIMPLE" && $tree["LB"]["CLASS"] != "QSIMPLE") || (is_array($tree["LB"]["VALUE"]) && count($tree["LB"]["VALUE"]) != 1)) { - // un op. arith. doit étre précédé d'un seul nom de champ - if ($this->errmsg != "") - $this->errmsg .= sprintf("\\n"); - $this->errmsg .= sprintf(_('qparser::Formulation incorrecte, un nom de champs est attendu avant l operateur %s'), $tree["VALUE"]); - - return(false); - } - if (!$tree["RB"] || ($tree["RB"]["CLASS"] != "SIMPLE" && $tree["RB"]["CLASS"] != "QSIMPLE")) { - // un op. arith. doit étre suivi d'une valeur - if ($this->errmsg != "") - $this->errmsg .= sprintf("\\n"); - $this->errmsg .= sprintf(_('qparser::Formulation incorrecte, une valeur est attendue apres l operateur %s'), $tree["VALUE"]); - - return(false); - } - if (is_array($tree["RB"]["VALUE"])) { - $lw = ""; - foreach ($tree["RB"]["VALUE"] as $w) - $lw .= ( $lw == "" ? "" : " ") . $w; - $tree["RB"]["VALUE"] = $lw; - } - } - - /** gestion des branches null - * a revoir car ca ppete pas d'erreur mais corrige automatiquement - * ** */ - if (!isset($tree["RB"])) - $tree = $tree["LB"]; - else - if (!isset($tree["LB"])) - $tree = $tree["RB"]; - } else { - if (($tree["CLASS"] == "SIMPLE" || $tree["CLASS"] == "QSIMPLE")) { - if (is_array($tree["VALUE"])) { - $treetmp = null; - $pnum = 0; - for ($i = 0; $i < count($tree["VALUE"]); $i++) { - // gestion mot vide - if (isset($emptyw[$tree["VALUE"][$i]]) || $tree["VALUE"][$i] == "?" || $tree["VALUE"][$i] == "*") { - // on a forcé les '?' ou '*' isolés comme des mots vides - $pnum++; - } else { - if ($treetmp == null) { - $treetmp = array("CLASS" => $tree["CLASS"], - "NODETYPE" => $tree["NODETYPE"], - "VALUE" => $tree["VALUE"][$i], - "PNUM" => $tree["PNUM"], - "DEPTH" => $tree["DEPTH"]); - $pnum = 0; - } else { - $dop = $tree["CLASS"] == "QSIMPLE" ? $this->quoted_defaultop : $this->defaultop; - $treetmp = array("CLASS" => "OPS", - "VALUE" => $dop["VALUE"], - "NODETYPE" => $dop["NODETYPE"], - "PNUM" => $pnum, // peut-être écrasé par defaultop - "DEPTH" => $depth, - "LB" => $treetmp, - "RB" => array("CLASS" => $tree["CLASS"], - "NODETYPE" => $tree["NODETYPE"], - "VALUE" => $tree["VALUE"][$i], - "PNUM" => $tree["PNUM"], - "DEPTH" => $tree["DEPTH"]) - ); - if (array_key_exists("PNUM", $dop)) - $treetmp["PNUM"] = $dop["PNUM"]; - $pnum = 0; - } - } - } - $tree = $treetmp; - } - } - } - - return(true); - } - - public function factor_or(&$tree) - { - do - $n = $this->factor_or2($tree); while ($n > 0); - } - - public function factor_or2(&$tree, $depth = 0) - { - $nmodif = 0; - if ($tree["CLASS"] == "OPS" || $tree["CLASS"] == "OPK") { - if ($tree["NODETYPE"] == PHRASEA_OP_OR && ($tree["LB"]["CLASS"] == "SIMPLE" || $tree["LB"]["CLASS"] == "QSIMPLE") && ($tree["RB"]["CLASS"] == "SIMPLE" || $tree["RB"]["CLASS"] == "QSIMPLE")) { - $tree["CLASS"] = "SIMPLE"; - $tree["NODETYPE"] = PHRASEA_KEYLIST; - $tree["VALUE"] = is_array($tree["LB"]["VALUE"]) ? $tree["LB"]["VALUE"] : array($tree["LB"]["VALUE"]); - if (is_array($tree["RB"]["VALUE"])) { - foreach ($tree["RB"]["VALUE"] as $v) - $tree["VALUE"][] = $v; - } else - $tree["VALUE"][] = $tree["RB"]["VALUE"]; - unset($tree["LB"]); - unset($tree["RB"]); - unset($tree["PNUM"]); - $nmodif++; - } else { - $nmodif += $this->factor_or2($tree["LB"], $depth + 1); - $nmodif += $this->factor_or2($tree["RB"], $depth + 1); - } - } - - return($nmodif); - } - - public function setNumValue(&$tree, SimpleXMLElement $sxml_struct, $depth = 0) - { - if ($tree["CLASS"] == "OPK") { - if (isset($tree["RB"]) && ($tree["RB"]["CLASS"] == "SIMPLE" || $tree["RB"]["CLASS"] == "QSIMPLE") && ($tree["LB"]["CLASS"] == "SIMPLE" || $tree["LB"]["CLASS"] == "QSIMPLE")) { - $z = $sxml_struct->xpath('/record/description'); - if ($z && is_array($z)) { - foreach ($z[0] as $ki => $vi) { - $champ = null; - if (is_array($tree["LB"]["VALUE"])) - $champ = $tree["LB"]["VALUE"][0]; - else - $champ = $tree["LB"]["VALUE"]; - if ($champ && strtoupper($ki) == strtoupper($champ)) { - foreach ($vi->attributes() as $propname => $val) { - if (strtoupper($propname) == strtoupper("type")) { - if ($tree["NODETYPE"] == PHRASEA_OP_EQUAL) // cas particulier du "=" sur une date - $this->changeNodeEquals($tree, $val); - else - $this->setNumValue2($tree["RB"], $val); - } - } - } - } - } - } - } - if (isset($tree["LB"])) - $this->setNumValue($tree["LB"], $sxml_struct, $depth + 1); - if (isset($tree["RB"])) - $this->setNumValue($tree["RB"], $sxml_struct, $depth + 1); - } - - public function changeNodeEquals(&$branch, $type) - { - if (strtoupper($type) == strtoupper("Date")) { - $branch = $this->changeNodeEquals2($branch); - } - } - - public function changeNodeEquals2($oneBranch) - { - ## creation branche gauche avec ">=" -// print("changeNodeEquals2\n"); -// print("creation branche gauche ( '>=' ) \n"); - $newTreeLB = array("CLASS" => "OPK", - "VALUE" => ">=", - "NODETYPE" => PHRASEA_OP_GEQT, - "PNUM" => NULL, - "DEPTH" => 0, - "LB" => $oneBranch["LB"], - "RB" => array("CLASS" => "SIMPLE", - "VALUE" => $this->isoDate($oneBranch["RB"]["VALUE"], false), - "NODETYPE" => PHRASEA_KEYLIST, - "PNUM" => NULL, - "DEPTH" => 0) - ); - - $newTreeRB = array("CLASS" => "OPK", - "VALUE" => "<=", - "NODETYPE" => PHRASEA_OP_LEQT, - "PNUM" => NULL, - "DEPTH" => 0, - "LB" => $oneBranch["LB"], - "RB" => array("CLASS" => "SIMPLE", - "VALUE" => $this->isoDate($oneBranch["RB"]["VALUE"], true), - "NODETYPE" => PHRASEA_KEYLIST, - "PNUM" => NULL, - "DEPTH" => 0) - ); -// print("fin creation branche droite avec '<=' \n"); - ## fin creation branche droite ( "<=" ) - - $tree = array("CLASS" => "OPS", - "VALUE" => "et", - "NODETYPE" => PHRASEA_OP_AND, - "PNUM" => NULL, - "DEPTH" => 0, - "LB" => $newTreeLB, - "RB" => $newTreeRB); - - - return $tree; - } - - public function setNumValue2(&$branch, $type) - { - if (strtoupper($type) == strtoupper("Date")) { - $dateEnIso = $this->isoDate($branch["VALUE"]); - $branch["VALUE"] = $dateEnIso; - } - } - - public function isoDate($onedate, $max = false) - { - $v_y = "1900"; - $v_m = "01"; - $v_d = "01"; - - $v_h = $v_minutes = $v_s = "00"; - if ($max) { - $v_h = $v_minutes = $v_s = "99"; - } - $tmp = $onedate; - - if (!is_array($tmp)) - $tmp = explode(" ", $tmp); - - switch (sizeof($tmp)) { - // on a une date complete séparé avec des espaces, slash ou tiret - case 3 : - if (strlen($tmp[0]) == 4) { - $v_y = $tmp[0]; - $v_m = $tmp[1]; - $v_d = $tmp[2]; - // on a l'année en premier, on suppose alors que c'est de la forme YYYY MM DD - } elseif (strlen($tmp[2]) == 4) { - // on a l'année en dernier, on suppose alors que c'est de la forme DD MM YYYY - $v_y = $tmp[2]; - $v_m = $tmp[1]; - $v_d = $tmp[0]; - } else { - // l'année est sur un 2 chiffre et pas 4 - // ca fou la zone - - $v_d = $tmp[0]; - $v_m = $tmp[1]; - if ($tmp[2] < 20) - $v_y = "20" . $tmp[2]; - else - $v_y = "19" . $tmp[2]; - } - break; - - case 2 : - // On supposerait n'avoir que le mois et l'année - if (strlen($tmp[0]) == 4) { - $v_y = $tmp[0]; - $v_m = $tmp[1]; - // on a l'année en premier, on suppose alors que c'est de la forme YYYY MM DD - if ($max) - $v_d = "99"; - else - $v_d = "00"; - } elseif (strlen($tmp[1]) == 4) { - // on a l'année en premier, on suppose alors que c'est de la forme DD MM YYYY - $v_y = $tmp[1]; - $v_m = $tmp[0]; - if ($max) - $v_d = "99"; - else - $v_d = "00"; - } else { - // on a l'anné sur 2 chiffres - if ($tmp[1] < 20) - $v_y = "20" . $tmp[1]; - else - $v_y = "19" . $tmp[1]; - $v_m = $tmp[0]; - if ($max) - $v_d = "99"; - else - $v_d = "00"; - } - break; - - - // lé ca devient la zone pour savoir si on a que l'année ou si c'est une date sans espaces,slash ou tiret - case 1 : - switch (strlen($tmp[0])) { - case 14 : - // date iso YYYYMMDDHHMMSS - $v_y = substr($tmp[0], 0, 4); - $v_m = substr($tmp[0], 4, 2); - $v_d = substr($tmp[0], 6, 2); - $v_h = substr($tmp[0], 8, 2); - $v_minutes = substr($tmp[0], 10, 2); - $v_s = substr($tmp[0], 12, 2); - break; - case 8 : - // date iso YYYYMMDD - $v_y = substr($tmp[0], 0, 4); - $v_m = substr($tmp[0], 4, 2); - $v_d = substr($tmp[0], 6, 2); - break; - case 6 : - // date iso YYYYMM - $v_y = substr($tmp[0], 0, 4); - $v_m = substr($tmp[0], 4, 2); - if ($max) - $v_d = "99"; - else - $v_d = "00"; - break; - case 4 : - // date iso YYYY - $v_y = $tmp[0]; - - if ($max) - $v_m = "99"; - else - $v_m = "00"; - - if ($max) - $v_d = "99"; - else - $v_d = "00"; - break; - case 2 : - // date iso YY - if ($tmp[0] < 20) - $v_y = "20" . $tmp[0]; - else - $v_y = "19" . $tmp[0]; - - if ($max) - $v_m = "99"; - else - $v_m = "00"; - - if ($max) - $v_d = "99"; - else - $v_d = "00"; - break; - } - - - - break; - } - - return("" . $v_y . $v_m . $v_d . $v_h . $v_minutes . $v_s); - } - - public function distrib_in(&$tree, $depth = 0) - { - $opdistrib = array(PHRASEA_OP_AND, PHRASEA_OP_OR, PHRASEA_OP_EXCEPT, PHRASEA_OP_NEAR, PHRASEA_OP_BEFORE, PHRASEA_OP_AFTER); // ces opérateurs sont 'distribuables' autour d'un 'IN' - - if ($tree["CLASS"] == "OPS" || $tree["CLASS"] == "OPK") { - if ($tree["NODETYPE"] == PHRASEA_OP_IN || $tree["CLASS"] == "OPK") { - if ($tree["LB"]["CLASS"] == "OPK") { - // on a un truc du genre '(t1 = t2) dans t3' - // ... on ne fait rien - } - if ($tree["LB"]["CLASS"] == "OPS" && in_array($tree["LB"]["NODETYPE"], $opdistrib)) { - // on a un truc du genre '(t1 op t2) {dans|=} t3', on distribue le dans é t1 et t2 - // ==> ((t1 dans t3) op (t2 dans t3)) - $m_v = $tree["VALUE"]; - $m_t = $tree["CLASS"]; - $m_o = $tree["NODETYPE"]; - $m_n = $tree["PNUM"]; - - $tree["CLASS"] = $tree["LB"]["CLASS"]; - $tree["NODETYPE"] = $tree["LB"]["NODETYPE"]; - $tree["VALUE"] = $tree["LB"]["VALUE"]; - $tree["PNUM"] = $tree["LB"]["PNUM"]; - - $tree["LB"]["CLASS"] = $m_t; - $tree["LB"]["NODETYPE"] = $m_o; - $tree["LB"]["VALUE"] = $m_v; - $tree["LB"]["PNUM"] = $m_n; - - $tree["RB"] = array("CLASS" => $m_t, - "NODETYPE" => $m_o, - "VALUE" => $m_v, - "PNUM" => $m_n, - "LB" => $tree["LB"]["RB"], - "RB" => $tree["RB"]); - - $tree["LB"]["RB"] = $tree["RB"]["RB"]; - // return; - } - - if ($tree["RB"]["CLASS"] == "OPS" && in_array($tree["RB"]["NODETYPE"], $opdistrib)) { - - // on a un truc du genre 't1 {dans|=} (t2 op t3)', on distribue le dans é t2 et t3 - // ==> ((t1 dans t2) ou (t1 dans t3)) - $m_v = $tree["VALUE"]; - $m_t = $tree["CLASS"]; - $m_o = $tree["NODETYPE"]; - $m_n = $tree["PNUM"]; - - $tree["CLASS"] = $tree["RB"]["CLASS"]; - $tree["NODETYPE"] = $tree["RB"]["NODETYPE"]; - $tree["VALUE"] = $tree["RB"]["VALUE"]; - $tree["PNUM"] = $tree["RB"]["PNUM"]; - - $tree["RB"]["CLASS"] = $m_t; - $tree["RB"]["NODETYPE"] = $m_o; - $tree["RB"]["VALUE"] = $m_v; - $tree["RB"]["PNUM"] = $m_n; - - $tree["LB"] = array("CLASS" => $m_t, - "NODETYPE" => $m_o, - "VALUE" => $m_v, - "PNUM" => $m_n, - "LB" => $tree["LB"], - "RB" => $tree["RB"]["LB"]); - - $tree["RB"]["LB"] = $tree["LB"]["LB"]; - } - } - $this->distrib_in($tree["LB"], $depth + 1); - $this->distrib_in($tree["RB"], $depth + 1); - } - } - - public function makequery($tree) - { - $a = array($tree["NODETYPE"]); - switch ($tree["CLASS"]) { - case "PHRASEA_KW_LAST": - if ($tree["PNUM"] !== NULL) - $a[] = $tree["PNUM"]; - break; - case "PHRASEA_KW_ALL": - break; - case "SIMPLE": - case "QSIMPLE": - // pas de tid, c'est un terme normal - if (is_array($tree["VALUE"])) { - foreach ($tree["VALUE"] as $k => $v) - $a[] = $v; - } else { - $a[] = $tree["VALUE"]; - } - break; - case "OPK": - if ($tree["LB"] !== NULL) - $a[] = $this->makequery($tree["LB"]); - if ($tree["RB"] !== NULL) - $a[] = $this->makequery($tree["RB"]); - break; - case "OPS": - if ($tree["PNUM"] !== NULL) - $a[] = intval($tree["PNUM"]); - if ($tree["LB"] !== NULL) - $a[] = $this->makequery($tree["LB"]); - if ($tree["RB"] !== NULL) - $a[] = $this->makequery($tree["RB"]); - break; - } - - return($a); - } - - public function maketree($depth, $inquote = false) - { -// printf("\n\n"); - $tree = null; - while ($t = $this->nexttoken($inquote)) { - if ($this->debug) - printf("got token %s of class %s\n", $t["VALUE"], $t["CLASS"]); - switch ($t["CLASS"]) { - case "TOK_RP": - if ($inquote) { - // quand on est entre guillements les tokens perdent leur signification - $tree = $this->addtotree($tree, $t, $depth, $inquote); - if (!$tree) { - return(null); - } - } else { - if ($depth <= 0) { // ')' : retour de récursivité - if ($this->errmsg != "") - $this->errmsg .= sprintf("\\n"); - $this->errmsg .= _('qparser:: erreur : trop de parentheses fermantes'); - - return(null); - } - - return($tree); - } - break; - case "TOK_LP": - if ($inquote) { - // quand on est entre guillements les tokens perdent leur signification - $tree = $this->addtotree($tree, $t, $depth, $inquote); - if (!$tree) { - return(null); - } - } else { // '(' : appel récursif - if (!$tree) - $tree = $this->maketree($depth + 1); - else { - if (($tree["CLASS"] == "OPS" || $tree["CLASS"] == "OPK") && $tree["RB"] == null) { - $tree["RB"] = $this->maketree($depth + 1); - if (!$tree["RB"]) - $tree = null; - } else { - // ici on applique l'opérateur par défaut - $tree = array("CLASS" => "OPS", - "VALUE" => $this->defaultop["VALUE"], - "NODETYPE" => $this->defaultop["NODETYPE"], - "PNUM" => $this->defaultop["PNUM"], - "DEPTH" => $depth, - "LB" => $tree, - "RB" => $this->maketree($depth + 1)); - } - } - if (!$tree) { - return(null); - } - } - break; - case "TOK_VOID": - // ce token est entre guillemets : on le saute - break; - case "TOK_QUOTE": - // une expr entre guillemets est 'comme entre parenthéses', - // sinon "a b" OU "x y" -> (((a B0 b) OU x) B0 y) au lieu de - // "a b" OU "x y" -> ((a B0 b) OU (x B0 y)) - if ($inquote) { - if ($this->debug) { - print("CLOSING QUOTE!\n"); - } - // fermeture des guillemets -> retour de récursivité - if ($depth <= 0) { // ')' : retour de récursivité - print("\nguillemets fermants en trop
"); - - return(null); - } - - return($tree); - } else { - if ($this->debug) { - print("OPENING QUOTE!
"); - } - // ouverture des guillemets -> récursivité - if (!$tree) - $tree = $this->maketree($depth + 1, true); - else { - if (($tree["CLASS"] == "OPS" || $tree["CLASS"] == "OPK") && $tree["RB"] == null) { - $tree["RB"] = $this->maketree($depth + 1, true); - if (!$tree["RB"]) - $tree = null; - } else { - // ici on applique l'opérateur par défaut - $tree = array("CLASS" => "OPS", - "VALUE" => $this->defaultop["VALUE"], - "NODETYPE" => $this->defaultop["NODETYPE"], - "PNUM" => $this->defaultop["PNUM"], - "DEPTH" => $depth, - "LB" => $tree, - "RB" => $this->maketree($depth + 1, true)); - } - } - if (!$tree) { - return(null); - } - } - break; - default: - $tree = $this->addtotree($tree, $t, $depth, $inquote); - if ($this->debug) { - print("---- après addtotree ----\n"); - var_dump($tree); - print("-------------------------\n"); - } - if (!$tree) { - return(null); - } - break; - } - } - if (($tree["CLASS"] == "OPS" || $tree["CLASS"] == "OPK") && $tree["RB"] == null) { - if ($this->errmsg != "") - $this->errmsg .= sprintf("\\n"); - $this->errmsg .= sprintf(_('qparser::Formulation incorrecte, une valeur est attendu apres %s'), $tree["VALUE"]); - $tree = $tree["LB"]; - } - - return($tree); - } - - public function addtotree($tree, $t, $depth, $inquote) - { - if ($this->debug) { - printf("addtotree({tree}, \$t[CLASS]='%s', \$t[VALUE]='%s', \$depth=%d, inquote=%s)\n", $t["CLASS"], $t["VALUE"], $depth, $inquote ? "true" : "false"); - print("---- avant addtotree ----\n"); - var_dump($tree); - print("-------------------------\n"); - } - - if (!$t) { - return($tree); - } - - switch ($t["CLASS"]) { - case "TOK_CONTEXT": -// if($this->debug) -// { -// printf("addtotree({tree}, \$t='%s', \$depth=%d, inquote=%s)\n", $t["VALUE"], $depth, $inquote?"true":"false"); -// } - if ($tree["CLASS"] == "SIMPLE" || $tree["CLASS"] == "QSIMPLE") { - // un [xxx] suit un terme : il introduit un contexte - $tree["CONTEXT"] = $t["VALUE"]; - } elseif ($tree["CLASS"] == "OPS" || $tree["CLASS"] == "OPK") { - if (!isset($tree["RB"]) || !$tree["RB"]) { - // un [xxx] peut suivre un opérateur, c'est un paramétre normalement numérique - $tree["PNUM"] = $t["VALUE"]; - } else { - // [xxx] suit un terme déjé en branche droite ? (ex: a ou b[k]) - if ($tree["RB"]["CLASS"] == "SIMPLE" || $tree["RB"]["CLASS"] == "QSIMPLE") - $tree["RB"]["CONTEXT"] = $t["VALUE"]; - else { - if ($this->errmsg != "") - $this->errmsg .= "\\n"; - $this->errmsg .= sprintf("le contexte [%s] ne peut suivre qu'un terme ou un opérateur
", $t["VALUE"]); - - return(null); - } - } - } else { - if ($this->errmsg != "") - $this->errmsg .= "\\n"; - $this->errmsg .= sprintf("le contexte [%s] ne peut suivre qu'un terme ou un opérateur
", $t["VALUE"]); - - return(null); - } - - return($tree); - break; - case "TOK_CMP": - // < > <= >= <> = : sont des opérateurs de comparaison - if (!$tree) { - // printf("\nUne question ne peut commencer par '" . $t["VALUE"] . "'
"); - if ($this->errmsg != "") - $this->errmsg .= "\\n"; - $this->errmsg .= sprintf(_('qparser::erreur : une question ne peut commencer par %s'), $t["VALUE"]); - - return(null); - } - if (($tree["CLASS"] == "OPS" || $tree["CLASS"] == "OPK") && $tree["RB"] == null) { - // printf("'" . $t["VALUE"] . "' ne peut suivre un opérateur
"); - if ($this->errmsg != "") - $this->errmsg .= "\\n"; - $this->errmsg .= sprintf(_('qparser::Formulation incorrecte, ne peut suivre un operateur : %s'), $t["VALUE"]); - - return(null); - } - - return(array("CLASS" => "OPK", "VALUE" => $t["VALUE"], "NODETYPE" => $this->opk[$t["VALUE"]]["NODETYPE"], "PNUM" => null, "DEPTH" => $depth, "LB" => $tree, "RB" => null)); - break; - case "TOK_WORD": - if ($t["CLASS"] == "TOK_WORD" && isset($this->ops[$t["VALUE"]]) && !$inquote) { - // ce mot est un opérateur phrasea - if (!$tree) { - // printf("\n581 : Une question ne peut commencer par un opérateur
"); - if ($this->errmsg != "") - $this->errmsg .= "\\n"; - $this->errmsg .= sprintf(_('qparser::erreur : une question ne peut commencer par %s'), $t["VALUE"]); - - return(null); - } - if (($tree["CLASS"] == "OPS" || $tree["CLASS"] == "OPK") && $tree["RB"] == null) { - - // printf("\n586 : Un opérateur ne peut suivre un opérateur
"); - if ($this->errmsg != "") - $this->errmsg .= "\\n"; - $this->errmsg .= sprintf(_('qparser::Formulation incorrecte, %s ne peut suivre un operateur'), $t["VALUE"]); - - return(null); - } - $pnum = null; - if ($this->ops[$t["VALUE"]]["CANNUM"]) { - // cet opérateur peut étre suivi d'un nombre ('near', 'before', 'after') - if ($tn = $this->nexttoken()) { - if ($tn["CLASS"] == "TOK_WORD" && is_numeric($tn["VALUE"])) - $pnum = (int) $tn["VALUE"]; - else - $this->ungettoken($tn["VALUE"]); - } - } - - return(array("CLASS" => "OPS", "VALUE" => $t["VALUE"], "NODETYPE" => $this->ops[$t["VALUE"]]["NODETYPE"], "PNUM" => $pnum, "DEPTH" => $depth, "LB" => $tree, "RB" => null)); - } else { - // ce mot n'est pas un opérateur - $pnum = null; - $nodetype = PHRASEA_KEYLIST; - if ($t["CLASS"] == "TOK_WORD" && isset($this->spw[$t["VALUE"]]) && !$inquote) { - // mais c'est un mot 'spécial' de phrasea ('last', 'all') - $type = $this->spw[$t["VALUE"]]["CLASS"]; - $nodetype = $this->spw[$t["VALUE"]]["NODETYPE"]; - if ($this->spw[$t["VALUE"]]["CANNUM"]) { - // 'last' peut étre suivi d'un nombre - if ($tn = $this->nexttoken()) { - if ($tn["CLASS"] == "TOK_WORD" && is_numeric($tn["VALUE"])) - $pnum = (int) $tn["VALUE"]; - else - $this->ungettoken($tn["VALUE"]); - } - } - } else { - //printf("sdfsdfsdfsd
"); - $type = $inquote ? "QSIMPLE" : "SIMPLE"; - } - - return($this->addsimple($t, $type, $nodetype, $pnum, $tree, $depth)); - } - break; - } - } - - public function addsimple($t, $type, $nodetype, $pnum, $tree, $depth) - { - $nok = 0; - $w = $t["VALUE"]; - if ($w != "?" && $w != "*") { // on laisse passer les 'isolés' pour les traiter plus tard comme des mots vides - for ($i = 0; $i < strlen($w); $i++) { - $c = substr($w, $i, 1); - if ($c == "?" || $c == "*") { - if ($nok < $this->app['phraseanet.registry']->get('GV_min_letters_truncation')) { - if ($this->errmsg != "") - $this->errmsg .= sprintf("\\n"); - $this->errmsg .= _('qparser:: Formulation incorrecte, necessite plus de caractere : ') . "
" . $this->app['phraseanet.registry']->get('GV_min_letters_truncation'); - - return(null); - } - // $nok = 0; - } else - $nok++; - } - } - if (!$tree) { - return(array("CLASS" => $type, "NODETYPE" => $nodetype, "VALUE" => array($t["VALUE"]), "PNUM" => $pnum, "DEPTH" => $depth)); - } - switch ($tree["CLASS"]) { - case "SIMPLE": - case "QSIMPLE": - if ($type == "SIMPLE" || $type == "QSIMPLE") - $tree["VALUE"][] = $t["VALUE"]; - else { - $tree = array("CLASS" => "OPS", - "VALUE" => "et", - "NODETYPE" => PHRASEA_OP_AND, - "PNUM" => null, - "DEPTH" => $depth, - "LB" => $tree, - "RB" => array("CLASS" => $type, - "NODETYPE" => $nodetype, - "VALUE" => array($t["VALUE"]), - "PNUM" => $pnum, - "DEPTH" => $depth)); - } - - return($tree); - case "OPS": - case "OPK": - if ($tree["RB"] == null) { - $tree["RB"] = array("CLASS" => $type, "NODETYPE" => $nodetype, "VALUE" => array($t["VALUE"]), "PNUM" => $pnum, "DEPTH" => $depth); - - return($tree); - } else { - if (($tree["RB"]["CLASS"] == "SIMPLE" || $tree["RB"]["CLASS"] == "QSIMPLE") && $tree["RB"]["DEPTH"] == $depth) { - $tree["RB"]["VALUE"][] = $t["VALUE"]; - - return($tree); - } - if (($tree["RB"]["CLASS"] == "PHRASEA_KW_LAST" || $tree["RB"]["CLASS"] == "PHRASEA_KW_ALL") && $tree["RB"]["DEPTH"] == $depth) { - $tree["RB"] = array("CLASS" => "OPS", - "VALUE" => "et", - "NODETYPE" => PHRASEA_OP_AND, - "PNUM" => null, - "DEPTH" => $depth, - "LB" => $tree["RB"], - "RB" => array("CLASS" => $type, - "NODETYPE" => $nodetype, - "VALUE" => array($t["VALUE"]), - "PNUM" => $pnum, - "DEPTH" => $depth)); - - return($tree); - } - - return(array("CLASS" => "OPS", - "VALUE" => $this->defaultop["VALUE"], - "NODETYPE" => $this->defaultop["NODETYPE"], - "PNUM" => $this->defaultop["PNUM"], - "DEPTH" => $depth, - "LB" => $tree, - "RB" => array("CLASS" => $type, "NODETYPE" => $nodetype, "VALUE" => array($t["VALUE"]), "PNUM" => $pnum, "DEPTH" => $depth) - )); - } - case "PHRASEA_KW_LAST": - case "PHRASEA_KW_ALL": - return(array("CLASS" => "OPS", - "VALUE" => "et", - "NODETYPE" => PHRASEA_OP_AND, - "PNUM" => null, - "DEPTH" => $depth, - "LB" => $tree, - "RB" => array("CLASS" => $type, - "NODETYPE" => $nodetype, - "VALUE" => array($t["VALUE"]), - "PNUM" => $pnum, - "DEPTH" => $depth))); - } - } - - public function ungettoken($s) - { - $this->phq = $s . " " . $this->phq; - } - - public function nexttoken($inquote = false) - { - if ($this->phq == "") { - return(null); - } - - switch ($c = substr($this->phq, 0, 1)) { - case "<": - case ">": - if ($inquote) { - $this->phq = $this->mb_ltrim(mb_substr($this->phq, 1, 99999, 'UTF-8'), 'UTF-8'); - - return(array("CLASS" => "TOK_VOID", "VALUE" => $c)); - } - $c2 = $c . substr($this->phq, 1, 1); - if ($c2 == "<=" || $c2 == ">=" || $c2 == "<>") { - $this->phq = $this->mb_ltrim(mb_substr($this->phq, 2, 99999, 'UTF-8'), 'UTF-8'); - $c = $c2; - } else { - $this->phq = $this->mb_ltrim(mb_substr($this->phq, 1, 99999, 'UTF-8'), 'UTF-8'); - } - - return(array("CLASS" => "TOK_CMP", "VALUE" => $c)); - break; - case "=": - if ($inquote) { - $this->phq = $this->mb_ltrim(mb_substr($this->phq, 1, 99999, 'UTF-8'), 'UTF-8'); - - return(array("CLASS" => "TOK_VOID", "VALUE" => $c)); - } - $this->phq = $this->mb_ltrim(mb_substr($this->phq, 1, 99999, 'UTF-8'), 'UTF-8'); - - return(array("CLASS" => "TOK_CMP", "VALUE" => "=")); - break; - case ":": - if ($inquote) { - $this->phq = $this->mb_ltrim(mb_substr($this->phq, 1, 99999, 'UTF-8'), 'UTF-8'); - - return(array("CLASS" => "TOK_VOID", "VALUE" => $c)); - } - $this->phq = $this->mb_ltrim(mb_substr($this->phq, 1, 99999, 'UTF-8'), 'UTF-8'); - - return(array("CLASS" => "TOK_CMP", "VALUE" => ":")); - break; - case "(": - if ($inquote) { - $this->phq = $this->mb_ltrim(mb_substr($this->phq, 1, 99999, 'UTF-8'), 'UTF-8'); - - return(array("CLASS" => "TOK_VOID", "VALUE" => $c)); - } - $this->phq = $this->mb_ltrim(mb_substr($this->phq, 1, 99999, 'UTF-8'), 'UTF-8'); - - return(array("CLASS" => "TOK_LP", "VALUE" => "(")); - break; - case ")": - if ($inquote) { - $this->phq = $this->mb_ltrim(mb_substr($this->phq, 1, 99999, 'UTF-8'), 'UTF-8'); - - return(array("CLASS" => "TOK_VOID", "VALUE" => $c)); - } - $this->phq = $this->mb_ltrim(mb_substr($this->phq, 1, 99999, 'UTF-8'), 'UTF-8'); - - return(array("CLASS" => "TOK_RP", "VALUE" => ")")); - break; - case "[": - // if($inquote) - // { - // $this->phq = ltrim(substr($this->phq, 1)); - // return(array("CLASS"=>"TOK_VOID", "VALUE"=>$c)); - // } - // un '[' introduit un contexte qu'on lit jusqu'au ']' - $closeb = mb_strpos($this->phq, "]", 1, 'UTF-8'); - if ($closeb !== false) { - $context = $this->mb_trim(mb_substr($this->phq, 1, $closeb - 1, 'UTF-8'), 'UTF-8'); - $this->phq = $this->mb_ltrim(mb_substr($this->phq, $closeb + 1, 99999, 'UTF-8'), 'UTF-8'); - } else { - $this->phq = $this->mb_ltrim(mb_substr($this->phq, 1, 99999, 'UTF-8'), 'UTF-8'); - $this->phq = ""; - } - $context = $this->unicode->remove_indexer_chars($context); - - return(array("CLASS" => "TOK_CONTEXT", "VALUE" => $context)); - break; - /* - case "]": - // if($inquote) - // { - // $this->phq = ltrim(substr($this->phq, 1)); - // return(array("CLASS"=>"TOK_VOID", "VALUE"=>$c)); - // } - $this->phq = ltrim(substr($this->phq, 1)); - - return(array("CLASS"=>"TOK_RB", "VALUE"=>"]")); - break; - */ - case "\"": - $this->phq = $this->mb_ltrim(mb_substr($this->phq, 1, 99999, 'UTF-8'), 'UTF-8'); - - return(array("CLASS" => "TOK_QUOTE", "VALUE" => "\"")); - break; - default: - $l = mb_strlen($this->phq, 'UTF-8'); - $t = ""; - $c_utf8 = ""; - for ($i = 0; $i < $l; $i++) { - if (!$this->unicode->has_indexer_bad_char(($c_utf8 = mb_substr($this->phq, $i, 1, 'UTF-8')))) { - // $c = mb_strtolower($c); - // $t .= isset($this->noaccent[$c]) ? $this->noaccent[$c] : $c; - $t .= $this->unicode->remove_diacritics(mb_strtolower($c_utf8)); - } else - break; - } -// if ($c_utf8 == "(" || $c_utf8 == ")" || $c_utf8 == "[" || $c_utf8 == "]" || $c_utf8 == "=" || $c_utf8 == ":" || $c_utf8 == "<" || $c_utf8 == ">" || $c_utf8 == "\"") - if (in_array($c_utf8, array("(", ")", "[", "]", "=", ":", "<", ">", "\""))) { - // ces caractéres sont des délimiteurs avec un sens, il faut les garder - $this->phq = $this->mb_ltrim(mb_substr($this->phq, $i, 99999, 'UTF-8'), 'UTF-8'); - } else { - // le délimiteur était une simple ponctuation, on le saute - $this->phq = $this->mb_ltrim(mb_substr($this->phq, $i + 1, 99999, 'UTF-8'), 'UTF-8'); - } - if ($t != "") { - return(array("CLASS" => "TOK_WORD", "VALUE" => $t)); - } else { - return(array("CLASS" => "TOK_VOID", "VALUE" => $t)); - } - break; - } - } -} - diff --git a/lib/classes/searchEngine/adapter/sphinx/engine.class.php b/lib/classes/searchEngine/adapter/sphinx/engine.class.php deleted file mode 100644 index 26e7f26c4a..0000000000 --- a/lib/classes/searchEngine/adapter/sphinx/engine.class.php +++ /dev/null @@ -1,703 +0,0 @@ -app = $app; - - $this->sphinx = new SphinxClient (); - $this->sphinx->SetArrayResult(true); - - $this->sphinx->SetServer($this->app['phraseanet.registry']->get('GV_sphinx_host'), (int) $this->app['phraseanet.registry']->get('GV_sphinx_port')); - $this->sphinx->SetConnectTimeout(1); - - return $this; - } - - /** - * - * @param searchEngine_options $options - * @return searchEngine_adapter_sphinx_engine - */ - public function set_options(searchEngine_options $options) - { - $this->options = $options; - - $filters = array(); - - $sbas_ids = array(); - - $this->use_stemming = $options->get_use_stemming(); - $this->locale = $options->get_locale(); - - foreach ($options->get_bases() as $bas) { - $this->distinct_sbas[phrasea::sbasFromBas($this->app, $bas)] = true; - $key = phrasea::sbasFromBas($this->app, $bas) . '_' . phrasea::collFromBas($this->app, $bas); - $sbas_id = phrasea::sbasFromBas($this->app, $bas); - $sbas_ids[$sbas_id] = $sbas_id; - $filters[] = crc32($key); - } - - if ($filters) { - $this->sphinx->SetFilter('crc_sbas_coll', $filters); - } - - $this->sphinx->SetFilter('deleted', array(0)); - - $filters = array(); - - foreach ($sbas_ids as $sbas_id) { - $fields = $this->app['phraseanet.appbox']->get_databox($sbas_id)->get_meta_structure(); - - foreach ($fields as $field) { - if ( ! in_array($field->get_id(), $options->get_fields())) - continue; - - $key = $sbas_id . '_' . $field->get_id(); - $filters[] = crc32($key); - $this->search_in_field = true; - } - } - - $crc_coll_business = array(); - - if ($options->get_business_fields()) { - $this->search_in_field = true; - - foreach ($options->get_business_fields() as $base_id) { - $crc_coll_business[] = crc32(phrasea::collFromBas($this->app, $base_id) . '_1'); - $crc_coll_business[] = crc32(phrasea::collFromBas($this->app, $base_id) . '_0'); - } - - $non_business = array_diff($options->get_bases(), $options->get_business_fields()); - - foreach ($non_business as $base_id) { - $crc_coll_business[] = crc32(phrasea::collFromBas($this->app, $base_id) . '_0'); - } - - $this->sphinx->SetFilter('crc_coll_business', $crc_coll_business); - } elseif ($this->search_in_field) { - $this->sphinx->SetFilter('business', array(0)); - } - - if ($filters) { - $this->sphinx->SetFilter('crc_struct_id', $filters); - } - - /** - * @todo : enhance : check status better - */ - foreach ($sbas_ids as $sbas_id) { - $s_status = $this->app['phraseanet.appbox']->get_databox($sbas_id)->get_statusbits(); - $status_opts = $options->get_status(); - foreach ($s_status as $n => $status) { - if ( ! array_key_exists($n, $status_opts)) - continue; - if ( ! array_key_exists($sbas_id, $status_opts[$n])) - continue; - $crc = crc32($sbas_id . '_' . $n); - $this->sphinx->SetFilter('status', array($crc), ($status_opts[$n][$sbas_id] == '0')); - } - } - - $this->sphinx->SetFilter('parent_record_id', array($options->get_search_type())); - - $filters = array(); - - if ($options->get_record_type() != '') { - $filters[] = crc32($options->get_record_type()); - } - - if ($filters) { - $this->sphinx->SetFilter('crc_type', $filters); - } - - $ord = ''; - switch ($options->get_sortord()) { - case searchEngine_options::SORT_MODE_ASC: - $ord = 'ASC'; - break; - case searchEngine_options::SORT_MODE_DESC: - default: - $ord = 'DESC'; - break; - } - - switch ($options->get_sortby()) { - case searchEngine_options::SORT_RANDOM: - $sort = '@random'; - break; - case searchEngine_options::SORT_RELEVANCE: - default: - $sort = '@relevance ' . $ord . ', created_on ' . $ord; - break; - case searchEngine_options::SORT_CREATED_ON: - $sort = 'created_on ' . $ord; - break; - } - - $this->sphinx->SetGroupBy('crc_sbas_record', SPH_GROUPBY_ATTR, $sort); - - return $this; - } - - /** - * - * @return array - */ - public function get_status() - { - $status = $this->sphinx->Status(); - if (false === $status) - throw new Exception(_('Sphinx server is offline')); - - return $status; - } - - /** - * - * @return searchEngine_adapter_sphinx_engine - */ - protected function parse_query() - { - $this->query = trim($this->query); - - while (substr($this->query, 0, 1) === '(' && substr($this->query, -1) === ')') - $this->query = substr($this->query, 1, (mb_strlen($this->query) - 2)); - - if ($this->query == 'all') - $this->query = ''; - - while (mb_strpos($this->query, ' ') !== false) { - $this->query = str_replace(' ', ' ', $this->query); - } - - $preg = preg_match('/\s?recordid\s?=\s?([0-9]+)/i', $this->query, $matches, 0, 0); - - if ($preg > 0) { - $this->sphinx->SetFilter('record_id', array($matches[1])); - $this->query = ''; - $this->search_unique_record = true; - } else { - $offset = 0; - while (($pos = mb_strpos($this->query, '-', $offset)) !== false) { - $offset = $pos + 1; - if ($pos === 0) { - continue; - } - if (mb_substr($this->query, ($pos - 1), 1) !== ' ') { - $this->query = mb_substr($this->query, 0, ($pos)) . ' ' . mb_substr($this->query, $pos + 1); - } - } - - $this->query = str_ireplace(array(' ou ', ' or '), '|', $this->query); - $this->query = str_ireplace(array(' sauf ', ' except '), ' -', $this->query); - $this->query = str_ireplace(array(' and ', ' et '), ' +', $this->query); - } - - return $this; - } - - /** - * - * @param string $query - * @param int $offset - * @param int $perPage - * @return searchEngine_results - */ - public function results($query, $offset, $perPage) - { - - assert(is_int($offset)); - assert($offset >= 0); - assert(is_int($perPage)); - - $page = ceil($offset / $perPage) + 1; - - $this->current_page = $page; - $this->perPage = $perPage; - $this->offset_start = $offset; - $this->query = $query; - - $this->sphinx->SetLimits($offset, $this->perPage); - $this->sphinx->SetMatchMode(SPH_MATCH_EXTENDED2); - $this->parse_query(); - - $index = '*'; - - $params = phrasea::sbas_params($this->app); - - $index_keys = array(); - foreach ($params as $sbas_id => $params) { - if ( ! array_key_exists($sbas_id, $this->distinct_sbas)) - continue; - $index_keys[] = crc32(str_replace(array('.', '%'), '_', sprintf('%s_%s_%s_%s', $params['host'], $params['port'], $params['user'], $params['dbname']))); - } - - if (count($index_keys) > 0) { - if ($this->search_in_field === false) { - $index = ''; - $found = false; - if ($this->query !== '' && $this->options->get_use_stemming()) { - if ($this->app['locale.I18n'] == 'fr') { - $index .= ', documents' . implode('_stemmed_fr, documents', $index_keys) . '_stemmed_fr'; - $found = true; - } elseif ($this->app['locale.I18n'] == 'en') { - $index .= ', documents' . implode('_stemmed_en, documents', $index_keys) . '_stemmed_en'; - $found = true; - } - } - if ( ! $found) - $index .= 'documents' . implode(', documents', $index_keys); - $index .= ', docs_realtime' . implode(', docs_realtime', $index_keys); - } else { - $index = ''; - $found = false; - if ($this->query !== '' && $this->options->get_use_stemming() && $this->app['locale.I18n'] == 'fr') { - if ($this->app['locale.I18n'] == 'fr') { - $index .= ', metadatas' . implode('_stemmed_fr, metadatas', $index_keys) . '_stemmed_fr'; - $found = true; - } elseif ($this->app['locale.I18n'] == 'en') { - $index .= ', metadatas' . implode('_stemmed_en, metadatas', $index_keys) . '_stemmed_en'; - $found = true; - } - } - if ( ! $found) - $index = 'metadatas' . implode(',metadatas', $index_keys); - $index .= ', metas_realtime' . implode(', metas_realtime', $index_keys); - } - } - - $this->current_index = $index; - - $res = $this->sphinx->Query($this->query, $this->current_index); - $results = new set_result(); - - if ($res === false) { - if ($this->sphinx->IsConnectError() === true) { - $this->error = _('Sphinx server is offline'); - } else { - $this->error = $this->sphinx->GetLastError(); - } - $this->warning = $this->sphinx->GetLastWarning(); - } else { - $this->error = $res['error']; - $this->warning = $res['warning']; - - $this->total_time = $res['time']; - $this->total_results = $res['total_found']; - $this->total_available = $res['total']; - - $courcahnum = $this->offset_start; - - if (isset($res['matches'])) { - foreach ($res['matches'] as $record_id => $match) { - try { - $record = - new record_adapter( - $this->app, - $match['attrs']['sbas_id'] - , $match['attrs']['record_id'] - , $courcahnum - ); - - $results->add_element($record); - } catch (Exception $e) { - - } - $courcahnum ++; - } - } - } - - return new searchEngine_results($results, $this); - } - - /** - * - * @param string $keyword - * @return string - */ - public function BuildTrigrams($keyword) - { - $t = "__" . $keyword . "__"; - - $trigrams = ""; - for ($i = 0; $i < strlen($t) - 2; $i ++ ) - $trigrams .= substr($t, $i, 3) . " "; - - return $trigrams; - } - -// public function get_index_suggestions($keyword) -// { -// $trigrams = $this->BuildTrigrams($keyword); -// $query = "\"$trigrams\"/1"; -// $len = strlen($keyword); -// -// $this->sphinx->SetArrayResult(true); -// -// $this->sphinx->SetMatchMode(SPH_MATCH_EXTENDED2); -// $this->sphinx->SetRankingMode(SPH_RANK_WORDCOUNT); -// $this->sphinx->SetFilterRange("len", $len - 2, $len + 2); -// $this->sphinx->SetSelect("*, @weight+2-abs(len-$len) AS myrank"); -// $this->sphinx->SetSortMode(SPH_SORT_EXTENDED, "myrank DESC, freq DESC"); -// $this->sphinx->SetLimits(0, 10); -// -// $params = phrasea::sbas_params(); -// -// $index_keys = array(); -// foreach ($params as $sbas_id => $p) -// { -// $index_keys[] = crc32(str_replace(array('.', '%'), '_', sprintf('%s_%s_%s_%s', $p['host'], $p['port'], $p['user'], $p['dbname']))); -// } -// $index = 'suggest' . implode(',suggest', $index_keys); -// -// $res = $this->sphinx->Query($query, $index); -// -// if ($this->sphinx->Status() === false) -// { -// return array(); -// } -// -// if (!$res || !isset($res["matches"])) -// { -// return array(); -// } -// -// $ret = array(); -// foreach ($res["matches"] as $match) -// $ret[] = $match['attrs']['keyword']; -// -// return $ret; -// } - - protected function get_sugg_trigrams($word) - { - - $trigrams = $this->BuildTrigrams($word); - $query = "\"$trigrams\"/1"; - $len = strlen($word); - - $this->sphinx->ResetGroupBy(); - $this->sphinx->ResetFilters(); - - $this->sphinx->SetMatchMode(SPH_MATCH_EXTENDED2); - $this->sphinx->SetRankingMode(SPH_RANK_WORDCOUNT); - $this->sphinx->SetFilterRange("len", $len - 2, $len + 4); - - $this->sphinx->SetSortMode(SPH_SORT_EXTENDED, "@weight DESC"); - $this->sphinx->SetLimits(0, 10); - - $params = phrasea::sbas_params($this->app); - - $index_keys = array(); - foreach ($params as $sbas_id => $p) { - if ( ! array_key_exists($sbas_id, $this->distinct_sbas)) - continue; - $index_keys[] = crc32(str_replace(array('.', '%'), '_', sprintf('%s_%s_%s_%s', $p['host'], $p['port'], $p['user'], $p['dbname']))); - } - $index = 'suggest' . implode(',suggest', $index_keys); - - $res = $this->sphinx->Query($query, $index); - - if ($this->sphinx->Status() === false) { - return array(); - } - - if ( ! $res || ! isset($res["matches"])) { - return array(); - } - - $this->sphinx->ResetGroupBy(); - $this->sphinx->ResetFilters(); - - $this->set_options($this->options); - - $ret = array(); - foreach ($res["matches"] as $match) - $ret[] = $match['attrs']['keyword']; - - return $ret; - } - - public function get_suggestions($I18n, $only_last_word = false) - { - if ( ! $this->current_index) - $this->current_index = '*'; - - $supposed_qry = mb_strtolower($this->query); - $pieces = explode(" ", str_replace(array("all", "last", "et", "ou", "sauf", "and", "or", "except", "in", "dans", "'", '"', "(", ")", "_", "-"), ' ', $supposed_qry)); - - $clef = 'sph_sugg_' . crc32(serialize($this->options) . ' ' . $this->current_index . implode(' ', $pieces) . ' ' . ($only_last_word ? '1' : '0')); - - try { - return $this->app['phraseanet.appbox']->get_data_from_cache($clef); - } catch (Exception $e) { - - } - - $potential_queries = array(); - - $n = 0; - - if ($only_last_word) { - $pieces = array(array_pop($pieces)); - } - - $tag = $I18n; - - $suggestions = array(); - - $total_chaines = 0; - $propal_n = $this->get_total_results(); - - if (function_exists('enchant_broker_init')) { - $r = enchant_broker_init(); - if (enchant_broker_dict_exists($r, $tag)) { - $d = enchant_broker_request_dict($r, $tag); - - foreach ($pieces as $piece) { - if (trim($piece) === '') - continue; - - $found = false; - $suggs = array($piece); - if (enchant_dict_check($d, $piece) == false) { - $suggs = array_unique(array_merge($suggs, enchant_dict_suggest($d, $piece))); - } - - $suggestions[$n] = array('original' => $piece, 'suggs' => $suggs); - - $n ++; - } - enchant_broker_free_dict($d); - } - enchant_broker_free($r); - } - - if ($only_last_word) { - foreach ($pieces as $piece) { - foreach ($this->get_sugg_trigrams($piece) as $tri_sugg) { - $suggestions[$n] = array('original' => $piece, 'suggs' => array($tri_sugg)); - $n ++; - } - } - } - - $q_todo = array($supposed_qry); - $n = 0; - - foreach ($suggestions as $suggestion) { - $tmp_qq = array(); - foreach ($suggestion['suggs'] as $sugg) { - foreach ($q_todo as $q_td) { - $tmp_qq[] = $q_td; - $tmp_data = str_replace($suggestion['original'], $sugg, $q_td); - $tmp_qq[] = $tmp_data; - } - $tmp_qq[] = str_replace($suggestion['original'], $sugg, $supposed_qry); - } - $q_todo = array_unique(array_merge($tmp_qq, array($supposed_qry))); - - $n ++; - } - - $propals = array( - array( - 'value' => $supposed_qry - , 'current' => true - , 'hits' => $this->get_total_results() - ) - ); - - foreach ($q_todo as $f) { - if ($f == $supposed_qry) - continue; - - $clef_unique_datas = 'sph_sugg_' . crc32(serialize($this->options) . $this->current_index . $f); - - try { - $datas = $this->app['phraseanet.appbox']->get_data_from_cache($clef_unique_datas); - } catch (Exception $e) { - $datas = false; - } - if (is_int($datas)) { - $found = $datas; - $cache = true; - } else { - $cache = false; - $found = 0; - - $tmp_res = $this->sphinx->Query($f, $this->current_index); - - if ($tmp_res !== false && isset($tmp_res['total_found'])) { - $found = (int) $tmp_res['total_found']; - } - $this->app['phraseanet.appbox']->set_data_to_cache($found, $clef_unique_datas, 3600); - } - - if ($found > 0) { - $propals[] = array( - 'value' => $f - , 'current' => false - , 'hits' => $found - , 'cache' => $cache - ); - } - } - - usort($propals, array('self', 'suggestions_hit_sorter')); - - $max = 0; - - foreach ($propals as $key => $prop) { - $max = max($max, $prop['hits'] * 1 / 100); - if ($prop['hits'] < $max) - unset($propals[$key]); - } - - $this->app['phraseanet.appbox']->set_data_to_cache($propals, $clef, 3600); - - return $propals; - } - - protected static function suggestions_hit_sorter($a, $b) - { - if ($a['hits'] == $b['hits']) { - return 0; - } - - return ($a['hits'] > $b['hits']) ? -1 : 1; - } - - /** - * - * @return string - */ - public function get_parsed_query() - { - return $this->query; - } - - /** - * - * @param string $query - * @param array $fields - * @param int $selected_sbas_id - * @return array - */ - public function build_excerpt($query, array $fields, record_adapter $record) - { - $selected_sbas_id = $record->get_sbas_id(); - - $index = ''; - - $params = phrasea::sbas_params($this->app); - - $index_keys = array(); - foreach ($params as $sbas_id => $params) { - if ($sbas_id != $selected_sbas_id) - continue; - $index_keys[] = crc32(str_replace(array('.', '%'), '_', sprintf('%s_%s_%s_%s', $params['host'], $params['port'], $params['user'], $params['dbname']))); - } - - if (count($index_keys) > 0) { - if ($this->search_in_field === false) { - $index = ''; - $found = false; - if ($this->options->get_use_stemming()) { - if ($this->app['locale.I18n'] == 'fr') { - $index .= 'documents' . implode('_stemmed_fr, documents', $index_keys) . '_stemmed_fr'; - $found = true; - } elseif ($this->app['locale.I18n'] == 'en') { - $index .= 'documents' . implode('_stemmed_en, documents', $index_keys) . '_stemmed_en'; - $found = true; - } - } - if ( ! $found) - $index .= 'documents' . implode(', documents', $index_keys); - } else { - $index = ''; - $found = false; - if ($this->options->get_use_stemming() && $this->app['locale.I18n'] == 'fr') { - if ($this->app['locale.I18n'] == 'fr') { - $index .= 'metadatas' . implode('_stemmed_fr, metadatas', $index_keys) . '_stemmed_fr'; - $found = true; - } elseif ($this->app['locale.I18n'] == 'en') { - $index .= 'metadatas' . implode('_stemmed_en, metadatas', $index_keys) . '_stemmed_en'; - $found = true; - } - } - if ( ! $found) - $index = 'metadatas' . implode(',metadatas', $index_keys); - } - } - $opts = array( - 'before_match' => "", - 'after_match' => "" - ); - - $fields_to_send = array(); - - foreach ($fields as $k => $f) { - $fields_to_send[$k] = $f['value']; - } - - return $this->sphinx->BuildExcerpts($fields_to_send, $index, $query, $opts); - } -} diff --git a/lib/classes/searchEngine/options.class.php b/lib/classes/searchEngine/options.class.php deleted file mode 100644 index 2c37d499f6..0000000000 --- a/lib/classes/searchEngine/options.class.php +++ /dev/null @@ -1,464 +0,0 @@ -i18n = $locale; - } - - /** - * - * @return string - */ - public function get_locale() - { - return $this->i18n; - } - - /** - * - * @param const $sort_by - * @param const $sort_ord - * @return searchEngine_options - */ - public function set_sort($sort_by, $sort_ord = self::SORT_MODE_DESC) - { - $this->sort_by = $sort_by; - $this->sort_ord = $sort_ord; - - return $this; - } - - public function set_business_fields(Array $base_ids) - { - $this->business_fields = $base_ids; - - return $this; - } - - public function get_business_fields() - { - return $this->business_fields; - } - - /** - * - * @return string - */ - public function get_sortby() - { - return $this->sort_by; - } - - /** - * - * @return string - */ - public function get_sortord() - { - return $this->sort_ord; - } - - /** - * - * @param boolean $boolean - * @return searchEngine_options - */ - public function set_use_stemming($boolean) - { - $this->stemming = ! ! $boolean; - - return $this; - } - - /** - * - * @return boolean - */ - public function get_use_stemming() - { - return $this->stemming; - } - - /** - * - * @param int $search_type - * @return searchEngine_options - */ - public function set_search_type($search_type) - { - switch ($search_type) { - case self::RECORD_RECORD: - default: - $this->search_type = self::RECORD_RECORD; - break; - case self::RECORD_GROUPING: - $this->search_type = self::RECORD_GROUPING; - break; - } - - return $this; - } - - /** - * - * @return int - */ - public function get_search_type() - { - return $this->search_type; - } - - /** - * - * @param array $base_ids - * @param ACL $ACL - * @return searchEngine_options - */ - public function set_bases(Array $base_ids, ACL $ACL) - { - foreach ($base_ids as $base_id) { - if ($ACL->has_access_to_base($base_id)) - $this->bases[$base_id] = $base_id; - } - - return $this; - } - - /** - * - * @return array - */ - public function get_bases() - { - return $this->bases; - } - - /** - * - * @param array $fields - * @return searchEngine_options - */ - public function set_fields(Array $fields) - { - $this->fields = $fields; - - return $this; - } - - /** - * - * @return array - */ - public function get_fields() - { - return $this->fields; - } - - /** - * - * @param array $status - * @return searchEngine_options - */ - public function set_status(Array $status) - { - $tmp = array(); - foreach ($status as $n => $options) { - if (count($options) > 1) - continue; - if (isset($options['on'])) { - foreach ($options['on'] as $sbas_id) - $tmp[$n][$sbas_id] = 1; - } - if (isset($options['off'])) { - foreach ($options['off'] as $sbas_id) - $tmp[$n][$sbas_id] = 0; - } - } - - $this->status = $tmp; - - return $this; - } - - /** - * - * @return array - */ - public function get_status() - { - return $this->status; - } - - /** - * - * @param string $record_type - * @return searchEngine_options - */ - public function set_record_type($record_type) - { - switch ($record_type) { - case self::TYPE_ALL: - default: - $this->record_type = self::TYPE_ALL; - break; - case self::TYPE_AUDIO: - $this->record_type = self::TYPE_AUDIO; - break; - case self::TYPE_VIDEO: - $this->record_type = self::TYPE_VIDEO; - break; - case self::TYPE_DOCUMENT: - $this->record_type = self::TYPE_DOCUMENT; - break; - case self::TYPE_FLASH: - $this->record_type = self::TYPE_FLASH; - break; - case self::TYPE_IMAGE: - $this->record_type = self::TYPE_IMAGE; - break; - } - - return $this; - } - - /** - * - * @return string - */ - public function get_record_type() - { - return $this->record_type; - } - - /** - * - * @param string $min_date - * @return searchEngine_options - */ - public function set_min_date($min_date) - { - if ( ! is_null($min_date) && trim($min_date) !== '') { - $this->date_min = DateTime::createFromFormat('Y/m/d H:i:s', $min_date . ' 00:00:00'); - } - - return $this; - } - - /** - * - * @return DateTime - */ - public function get_min_date() - { - return $this->date_min; - } - - /** - * - * @param string $max_date - * @return searchEngine_options - */ - public function set_max_date($max_date) - { - if ( ! is_null($max_date) && trim($max_date) !== '') { - $this->date_max = DateTime::createFromFormat('Y/m/d H:i:s', $max_date . ' 23:59:59'); - } - - return $this; - } - - /** - * - * @return DateTime - */ - public function get_max_date() - { - return $this->date_max; - } - - /** - * - * @param array $fields - * @return searchEngine_options - */ - public function set_date_fields(Array $fields) - { - $this->date_fields = $fields; - - return $this; - } - - /** - * - * @return array - */ - public function get_date_fields() - { - return $this->date_fields; - } - - /** - * - * @return string - */ - public function serialize() - { - $ret = array(); - foreach ($this as $key => $value) { - if ($value instanceof DateTime) - $value = $value->format('d-m-Y h:i:s'); - - $ret[$key] = $value; - } - - return p4string::jsonencode($ret); - } - - /** - * - * @param string $serialized - * @return searchEngine_options - */ - public function unserialize($serialized) - { - $serialized = json_decode($serialized); - - foreach ($serialized as $key => $value) { - if (is_null($value)) { - $value = null; - } elseif (in_array($key, array('date_min', 'date_max'))) { - $value = new DateTime($value); - } elseif ($value instanceof stdClass) { - $tmpvalue = (array) $value; - $value = array(); - - foreach ($tmpvalue as $k => $data) { - $k = ctype_digit($k) ? (int) $k : $k; - $value[$k] = $data; - } - } - - $this->$key = $value; - } - - return $this; - } -} diff --git a/lib/classes/searchEngine/results.class.php b/lib/classes/searchEngine/results.class.php deleted file mode 100644 index bafb2b74d2..0000000000 --- a/lib/classes/searchEngine/results.class.php +++ /dev/null @@ -1,144 +0,0 @@ -engine = $engine; - $this->result = $result; - - return $this; - } - - /** - * - * @return set - */ - public function get_datas() - { - return $this->result; - } - - /** - * - * @return float - */ - public function get_query_time() - { - return $this->engine->get_time(); - } - - /** - * - * @return int - */ - public function get_total_pages() - { - return $this->engine->get_total_pages(); - } - - /** - * - * @return int - */ - public function get_current_page() - { - return (int) $this->engine->get_current_page(); - } - - /** - * - * @return int - */ - public function get_count_available_results() - { - return (int) $this->engine->get_available_results(); - } - - /** - * - * @return int - */ - public function get_count_total_results() - { - return (int) $this->engine->get_total_results(); - } - - /** - * - * @return string - */ - public function get_error() - { - return $this->engine->get_error(); - } - - /** - * - * @return string - */ - public function get_warning() - { - return $this->engine->get_warning(); - } - - /** - * - * @return array - */ - public function get_suggestions($I18n) - { - return $this->engine->get_suggestions($I18n); - } - - /** - * - * @return string - */ - public function get_propositions() - { - return $this->engine->get_propositions(); - } - - /** - * - * @return string - */ - public function get_search_indexes() - { - return $this->engine->get_current_indexes(); - } -} diff --git a/lib/classes/set/result.class.php b/lib/classes/set/result.class.php deleted file mode 100644 index c19c8b4e13..0000000000 --- a/lib/classes/set/result.class.php +++ /dev/null @@ -1,29 +0,0 @@ -object = new searchEngine_options(); - } - - public function testSet_locale() - { - $locale = 'BABA'; - $this->object->set_locale($locale); - $this->assertEquals($locale, $this->object->get_locale()); - } - - public function testGet_locale() - { - $locale = null; - $this->object->set_locale($locale); - $this->assertEquals($locale, $this->object->get_locale()); - } - - public function testSet_sort() - { - $by = 'NAME'; - $sort = 'ASC'; - $this->object->set_sort($by, $sort); - $this->assertEquals($by, $this->object->get_sortby()); - $this->assertEquals($sort, $this->object->get_sortord()); - $this->object->set_sort($by); - $this->assertEquals($by, $this->object->get_sortby()); - $this->assertEquals(searchEngine_options::SORT_MODE_DESC, $this->object->get_sortord()); - } - - public function testGet_sortby() - { - $by = 'NAME'; - $sort = 'DESC'; - $this->object->set_sort($by, $sort); - $this->assertEquals($by, $this->object->get_sortby()); - $this->assertEquals($sort, $this->object->get_sortord()); - } - - public function testGet_sortord() - { - $by = 'NAME'; - $sort = 'DESC'; - $this->object->set_sort($by, $sort); - $this->assertEquals($by, $this->object->get_sortby()); - $this->assertEquals($sort, $this->object->get_sortord()); - } - - public function testSet_use_stemming() - { - $bool = true; - $this->object->set_use_stemming($bool); - $this->assertEquals($bool, $this->object->get_use_stemming()); - $bool = false; - $this->object->set_use_stemming($bool); - $this->assertEquals($bool, $this->object->get_use_stemming()); - } - - public function testGet_use_stemming() - { - $bool = true; - $this->object->set_use_stemming($bool); - $this->assertEquals($bool, $this->object->get_use_stemming()); - $bool = false; - $this->object->set_use_stemming($bool); - $this->assertEquals($bool, $this->object->get_use_stemming()); - } - - public function testSet_search_type() - { - $type = "caca"; - $this->object->set_search_type($type); - $this->assertEquals(searchEngine_options::RECORD_RECORD, $this->object->get_search_type()); - $type = searchEngine_options::RECORD_RECORD; - $this->object->set_search_type($type); - $this->assertEquals(searchEngine_options::RECORD_RECORD, $this->object->get_search_type()); - $type = searchEngine_options::RECORD_GROUPING; - $this->object->set_search_type($type); - $this->assertEquals(searchEngine_options::RECORD_GROUPING, $this->object->get_search_type()); - } - - public function testGet_search_type() - { - $type = "caca"; - $this->object->set_search_type($type); - $this->assertEquals(searchEngine_options::RECORD_RECORD, $this->object->get_search_type()); - $type = searchEngine_options::RECORD_RECORD; - $this->object->set_search_type($type); - $this->assertEquals(searchEngine_options::RECORD_RECORD, $this->object->get_search_type()); - $type = searchEngine_options::RECORD_GROUPING; - $this->object->set_search_type($type); - $this->assertEquals(searchEngine_options::RECORD_GROUPING, $this->object->get_search_type()); - } - - public function testSet_bases() - { - $bases = array_keys(self::$DI['user']->ACL()->get_granted_base()); - $this->object->set_bases($bases, self::$DI['user']->ACL()); - $this->assertEquals(array_values($bases), array_values($this->object->get_bases())); - } - - public function testGet_bases() - { - $bases = array_keys(self::$DI['user']->ACL()->get_granted_base()); - $this->object->set_bases($bases, self::$DI['user']->ACL()); - $this->assertEquals(array_values($bases), array_values($this->object->get_bases())); - } - - public function testSet_fields() - { - // Remove the following lines when you implement this test. - $this->markTestIncomplete( - 'This test has not been implemented yet.' - ); - } - - public function testGet_fields() - { - // Remove the following lines when you implement this test. - $this->markTestIncomplete( - 'This test has not been implemented yet.' - ); - } - - public function testSet_status() - { - // Remove the following lines when you implement this test. - $this->markTestIncomplete( - 'This test has not been implemented yet.' - ); - } - - public function testGet_status() - { - // Remove the following lines when you implement this test. - $this->markTestIncomplete( - 'This test has not been implemented yet.' - ); - } - - public function testSet_record_type() - { - // Remove the following lines when you implement this test. - $this->markTestIncomplete( - 'This test has not been implemented yet.' - ); - } - - public function testGet_record_type() - { - // Remove the following lines when you implement this test. - $this->markTestIncomplete( - 'This test has not been implemented yet.' - ); - } - - public function testSet_min_date() - { - // Remove the following lines when you implement this test. - $this->markTestIncomplete( - 'This test has not been implemented yet.' - ); - } - - public function testGet_min_date() - { - // Remove the following lines when you implement this test. - $this->markTestIncomplete( - 'This test has not been implemented yet.' - ); - } - - public function testSet_max_date() - { - // Remove the following lines when you implement this test. - $this->markTestIncomplete( - 'This test has not been implemented yet.' - ); - } - - public function testGet_max_date() - { - // Remove the following lines when you implement this test. - $this->markTestIncomplete( - 'This test has not been implemented yet.' - ); - } - - public function testSet_date_fields() - { - // Remove the following lines when you implement this test. - $this->markTestIncomplete( - 'This test has not been implemented yet.' - ); - } - - public function testGet_date_fields() - { - // Remove the following lines when you implement this test. - $this->markTestIncomplete( - 'This test has not been implemented yet.' - ); - } - - public function testSerialize() - { - $bases = array_keys(self::$DI['user']->ACL()->get_granted_base()); - $this->object->set_bases($bases, self::$DI['user']->ACL()); - $this->object->set_date_fields(array()); - $this->object->set_locale('fr_FR'); - $this->object->set_max_date(null); - $this->object->set_min_date(null); - $this->object->set_record_type(searchEngine_options::TYPE_AUDIO); - $this->object->set_search_type(searchEngine_options::RECORD_RECORD); - $this->object->set_sort('Name', 'DESC'); - $this->object->set_status(array()); - $this->object->set_use_stemming(true); - $this->assertEquals($this->object, unserialize(serialize($this->object))); - } - - public function testUnserialize() - { - // Remove the following lines when you implement this test. - $this->markTestIncomplete( - 'This test has not been implemented yet.' - ); - } -}