array("NODETYPE"=>PHRASEA_OP_AND, "CANNUM"=>false), "and" => array("NODETYPE"=>PHRASEA_OP_AND, "CANNUM"=>false), "ou" => array("NODETYPE"=>PHRASEA_OP_OR, "CANNUM"=>false), "or" => array("NODETYPE"=>PHRASEA_OP_OR, "CANNUM"=>false), "sauf" => array("NODETYPE"=>PHRASEA_OP_EXCEPT, "CANNUM"=>false), "except" => array("NODETYPE"=>PHRASEA_OP_EXCEPT, "CANNUM"=>false), "pres" => array("NODETYPE"=>PHRASEA_OP_NEAR, "CANNUM"=>true ), "near" => array("NODETYPE"=>PHRASEA_OP_NEAR, "CANNUM"=>true ), "avant" => array("NODETYPE"=>PHRASEA_OP_BEFORE, "CANNUM"=>true ), "before" => array("NODETYPE"=>PHRASEA_OP_BEFORE, "CANNUM"=>true ), "apres" => array("NODETYPE"=>PHRASEA_OP_AFTER, "CANNUM"=>true ), "after" => array("NODETYPE"=>PHRASEA_OP_AFTER, "CANNUM"=>true ), "dans" => array("NODETYPE"=>PHRASEA_OP_IN, "CANNUM"=>false), "in" => array("NODETYPE"=>PHRASEA_OP_IN, "CANNUM"=>false) ); var $opk = array( "<" => array("NODETYPE"=>PHRASEA_OP_LT, "CANNUM"=>false), ">" => array("NODETYPE"=>PHRASEA_OP_GT, "CANNUM"=>false), "<=" => array("NODETYPE"=>PHRASEA_OP_LEQT, "CANNUM"=>false), ">=" => array("NODETYPE"=>PHRASEA_OP_GEQT, "CANNUM"=>false), "<>" => array("NODETYPE"=>PHRASEA_OP_NOTEQU, "CANNUM"=>false), "=" => array("NODETYPE"=>PHRASEA_OP_EQUAL, "CANNUM"=>false), ":" => array("NODETYPE"=>PHRASEA_OP_COLON, "CANNUM"=>false) ); var $spw = array( "all" => array("CLASS"=>"PHRASEA_KW_ALL", "NODETYPE"=>PHRASEA_KW_ALL, "CANNUM"=>false), "last" => array("CLASS"=>"PHRASEA_KW_LAST", "NODETYPE"=>PHRASEA_KW_LAST, "CANNUM"=>true), // "first" => array("CLASS"=>PHRASEA_KW_FIRST, "CANNUM"=>true), // "premiers" => array("CLASS"=>PHRASEA_KW_FIRST, "CANNUM"=>true), "tout" => array("CLASS"=>"PHRASEA_KW_ALL", "NODETYPE"=>PHRASEA_KW_ALL, "CANNUM"=>false), "derniers" => array("CLASS"=>"PHRASEA_KW_LAST", "NODETYPE"=>PHRASEA_KW_LAST, "CANNUM"=>true) ); var $quoted_defaultop = array("VALUE"=>"default_avant", "NODETYPE"=>PHRASEA_OP_BEFORE, "PNUM"=>0 ); var $defaultop = array("VALUE"=>"and", "NODETYPE"=>PHRASEA_OP_AND, "PNUM"=>NULL ); var $defaultlast = 12; var $phq; // var $noaccent = array(); // var $str_in = "àáâãäåçèéêëìíîïðñòóôõöùúûüýÿøæå"; // var $str_out = "aaaaaaceeeeiiiionooooouuuuyyo@a"; var $errmsg =""; var $debug = false; // var $proposals = Array("QRY"=>"","TERMS"=>array()); // un tableau qui contiendra des propositions de thesaurus pour les termes de l'arbre simple var $proposals = Array("QRY"=>"","BASES"=>array()); // un tableau qui contiendra des propositions de thesaurus pour les termes de l'arbre simple var $lng = null; // le skin, utile pour afficher le thesaurus dans la bonne langue // le constructor function qparser($lng = "???") { $this->lng = $lng; // $l = strlen($this->str_in); // for($i=0; $i<$l; $i++) // $this->noaccent[substr($this->str_in, $i, 1)] = substr($this->str_out, $i, 1); // $this->debug = true; } // simulation du trim manquant dans mb_xxxx, ne fonctionne qu'en utf8 function mb_trim($s, $encoding) { return(trim($s)); } function mb_ltrim($s, $encoding) { return(ltrim($s)); } function parsequery($phq) { // $phq = utf8_encode($phq); // $phq0 = $phq; // $phq = ""; // for($i=0; $idebug) { for($i=0; $i\n\n"); // $this->proposals = Array("QRY"=>"","TERMS"=>array()); $this->proposals = Array("QRY"=>"","BASES"=>array()); $this->phq = $this->mb_trim($phq, 'UTF-8'); if($this->phq != "") return($this->maketree(0)); else { if($this->errmsg!="") $this->errmsg .= sprintf("\\n"); $this->errmsg .= _('qparser::la question est vide'); return(null); } } function astext($tree) { switch($tree["CLASS"]) { case "SIMPLE": if(is_array($tree["VALUE"])) return(implode(" ", $tree["VALUE"])); else return($tree["VALUE"]); break; case "QSIMPLE": if(is_array($tree["VALUE"])) return("\"" . implode(" ", $tree["VALUE"]) . "\""); else return("\"" . $tree["VALUE"] . "\""); break; case "PHRASEA_KW_ALL": return($tree["VALUE"][0]); break; case "PHRASEA_KW_LAST": if($tree["PNUM"]!==null) return("" . $tree["VALUE"][0] . "[" . $tree["PNUM"] . "]"); else return($tree["VALUE"][0]); break; case "OPS": case "OPK": if(isset($tree["PNUM"])) return("(" . $this->astext($tree["LB"]) . " " . $tree["VALUE"] . "[" . $tree["PNUM"] . "] " . $this->astext($tree["RB"]) . ")"); else return("(" . $this->astext($tree["LB"]) . " " . $tree["VALUE"] . " " . $this->astext($tree["RB"]) . ")"); break; } } function astable(&$tree) { $this->calc_complexity($tree); // var_dump($tree); $txt = ""; $this->astable2($txt, $tree); $txt = "\n\n" . $txt . "\n
\n"; return($txt); } function calc_complexity(&$tree) { if($tree) { if($tree["CLASS"]=="OPS" || $tree["CLASS"]=="OPK") return($tree["COMPLEXITY"] = $this->calc_complexity($tree["LB"]) + $this->calc_complexity($tree["RB"])); else return($tree["COMPLEXITY"] = 1); } } function astable2(&$out, &$tree, $depth=0) { switch($tree["CLASS"]) { case "SIMPLE": if(is_array($tree["VALUE"])) $txt = implode(" ", $tree["VALUE"]); else $txt = $tree["VALUE"]; $out .= "\t".$txt."\n"; break; case "QSIMPLE": if(is_array($tree["VALUE"])) $txt = implode(" ", $tree["VALUE"]); else $txt = $tree["VALUE"]; $out .= "\t"".$txt.""\n"; break; case "PHRASEA_KW_ALL": $out .= "\t".$tree["VALUE"][0]."\n"; break; case "PHRASEA_KW_LAST": if($tree["PNUM"]!==null) $out .= "\t".$tree["VALUE"][0] . "[" . $tree["PNUM"] . "]"."\n"; else $out .= "\t".$tree["VALUE"][0]."\n"; break; case "OPS": case "OPK": $op = $tree["VALUE"]; if(isset($tree["PNUM"])) $op .= "[" . $tree["PNUM"] . "]"; $out .= "\t$op\n"; $this->astable2($out, $tree["LB"], $depth+1); $this->astable2($out, $tree["RB"], $depth+1); $out .= "\n\n"; break; } } function dumpDiv(&$tree) { print("
\n"); $this->dumpDiv2($tree); print("
\n"); } function dumpDiv2(&$tree, $depth=0) { switch($tree["CLASS"]) { case "SIMPLE": if(is_array($tree["VALUE"])) $s = implode(" , ", $tree["VALUE"]); else $s = $tree["VALUE"]; print(str_repeat("\t", $depth) . "" . $s . "\n"); case "QSIMPLE": $s = ""; if(is_array($tree["VALUE"])) $s = implode(" , ", $tree["VALUE"]); else $s = $tree["VALUE"]; print(str_repeat("\t", $depth) . """ . $s . ""\n"); break; case "PHRASEA_KW_ALL": printf(str_repeat("\t", $depth) . "%s\n", $tree["VALUE"][0]); break; case "PHRASEA_KW_LAST": if($tree["PNUM"]!==null) printf(str_repeat("\t", $depth) . "%s %s\n", $tree["VALUE"][0], $tree["PNUM"]); else printf(str_repeat("\t", $depth) . "%s\n", $tree["VALUE"][0]); break; // case PHRASEA_KW_FIRST: // if($tree["PNUM"]!==null) // printf("%s %s", $tree["VALUE"], $tree["PNUM"]); // else // printf("%s", $tree["VALUE"]); // break; case "OPS": case "OPK": print(str_repeat("\t", $depth) . "
\n"); $this->dumpDiv2($tree["LB"], $depth+1); print(str_repeat("\t", $depth) . "
\n"); print(str_repeat("\t", $depth) . "
\n"); if(isset($tree["PNUM"])) printf(str_repeat("\t", $depth+1) . " %s[%s]\n", $tree["VALUE"], $tree["PNUM"]); else printf(str_repeat("\t", $depth+1) . " %s\n", $tree["VALUE"]); print(str_repeat("\t", $depth) . "
\n"); print(str_repeat("\t", $depth) . "
\n"); $this->dumpDiv2($tree["RB"], $depth+1); print(str_repeat("\t", $depth) . "
\n"); break; } } function dump($tree) { switch($tree["CLASS"]) { case "SIMPLE": if(is_array($tree["VALUE"])) $s = implode("
, ", $tree["VALUE"]); else $s = $tree["VALUE"]; print("" . $s . ""); break; case "QSIMPLE": if(is_array($tree["VALUE"])) $s = implode(" , ", $tree["VALUE"]); else $s = $tree["VALUE"]; print(""" . $s . """); break; case "PHRASEA_KW_ALL": printf("%s", $tree["VALUE"][0]); break; case "PHRASEA_KW_LAST": if($tree["PNUM"]!==null) printf("%s %s", $tree["VALUE"][0], $tree["PNUM"]); else printf("%s", $tree["VALUE"][0]); break; // case PHRASEA_KW_FIRST: // if($tree["PNUM"]!==null) // printf("%s %s", $tree["VALUE"], $tree["PNUM"]); // else // printf("%s", $tree["VALUE"]); // break; case "OPS": case "OPK": print(""); print(""); print(""); print(""); print(""); print(""); print(""); print(""); print("
"); if(isset($tree["PNUM"])) printf(" %s[%s] ", $tree["VALUE"], $tree["PNUM"]); else printf(" %s ", $tree["VALUE"]); print("
"); print($this->dump($tree["LB"])); print(""); print($this->dump($tree["RB"])); print("
"); break; } } function priority_opk(&$tree, $depth=0) { if(!$tree) return; if($tree["CLASS"]=="OPK" && ($tree["LB"]["CLASS"]=="OPS" || $tree["LB"]["CLASS"]=="OPK")) { // on a un truc du genre ((a ou b) < 5), on le transforme en (a ou (b < 5)) $t = $tree["LB"]; $tree["LB"] = $t["RB"]; $t["RB"] = $tree; $tree = $t; } if(isset($tree["LB"])) $this->priority_opk($tree["LB"], $depth+1); if(isset($tree["RB"])) $this->priority_opk($tree["RB"], $depth+1); } function distrib_opk(&$tree, $depth=0) { if(!$tree) return; if($tree["CLASS"]=="OPK" && ($tree["RB"]["CLASS"]=="OPS")) { // on a un truc du genre (a = (5 ou 6)), on le transforme en ((a = 5) ou (a = 6)) $tmp = array("CLASS"=>$tree["CLASS"], "NODETYPE"=>$tree["NODETYPE"], "VALUE"=>$tree["VALUE"], "PNUM"=>$tree["PNUM"], "LB"=>$tree["LB"], "RB"=>$tree["RB"]["RB"], "DEPTH"=>$tree["LB"]["DEPTH"] ); $t = $tree["RB"]; $tree["RB"] = $t["LB"]; $t["LB"] = $tree; $t["RB"] = $tmp; $tree = $t; } if(isset($tree["LB"])) $this->distrib_opk($tree["LB"], $depth+1); if(isset($tree["RB"])) $this->distrib_opk($tree["RB"], $depth+1); } function thesaurus2_apply(&$tree, $bid) { if(!$tree) return; if( ($tree["CLASS"]=="SIMPLE" || $tree["CLASS"]=="QSIMPLE") && isset($tree["SREF"]) && isset($tree["SREF"]["TIDS"]) ) { $tids = array(); foreach($tree["SREF"]["TIDS"] as $tid) { if($tid["bid"]==$bid) $tids[] = $tid["pid"]; } if(count($tids)>=1) { /* if(count($tids)==1) { // on cherche un id simple, on utilisera la syntaxe sql 'like' (l'extension repérera elle méme la syntaxe car la value finit par '%') $val = str_replace(".", "d", $tids[0]) . "d%"; $tree["VALUE"] = array($val); } else { // on cherche plusieurs id's, on utilisera la syntaxe 'regexp' (l'extension repérera elle méme la syntaxe car la value finit par '$' $val = ""; foreach($tids as $tid) $val .= ($val?"|":"") . "(" . str_replace(".", "d", $tid) . "d.*)"; $tree["VALUE"] = array("^" . $val); } */ $tree["VALUE"] = array(); foreach($tids as $tid) $tree["VALUE"][] = str_replace(".", "d", $tid) . "d%";; } else { // le mot n'est pas dans le thesaurus } /* */ } if($tree["CLASS"]=="OPS" || $tree["CLASS"]=="OPK") { $this->thesaurus2_apply($tree["LB"], $bid); $this->thesaurus2_apply($tree["RB"], $bid); } } // étend (ou remplace) la recherche sur les termes simples en recherche sur thesaurus // ex: (a et b) // full-text only : ==> (a et b) // thesaurus only : ==> ((th:a) et (th:b)) // ft et thesaurus : ==> ((a ou (th:a)) et (b ou (th:b))) // RETOURNE l'arbre résultat sans modifier l'arbre d'origine function extendThesaurusOnTerms(&$tree, $useFullText, $useThesaurus, $keepfuzzy) { $copy = $tree; $this->_extendThesaurusOnTerms($tree, $copy, $useFullText, $useThesaurus, $keepfuzzy, 0, ""); // var_dump($tree); $this->proposals["QRY"] = "" . $this->_queryAsHTML($tree) . ""; return($copy); } function _extendThesaurusOnTerms(&$tree, &$copy, $useFullText, $useThesaurus, $keepfuzzy, $depth, $path) { if($depth == 0) $ret = $tree; if(!$useThesaurus) return; // full-text only : inchangé if(($tree["CLASS"]=="SIMPLE" || $tree["CLASS"]=="QSIMPLE")) { if(isset($tree["CONTEXT"])) $copy = $this->_extendToThesaurus_Simple($tree, false, $keepfuzzy, $path); else $copy = $this->_extendToThesaurus_Simple($tree, $useFullText, $keepfuzzy, $path); } else { if($tree["CLASS"]=="OPK" && $tree["NODETYPE"]==PHRASEA_OP_COLON) { // on a 'field:value' , on traite 'value' $tree["RB"]["PATH"] = $copy["RB"]["PATH"] = $path."R"; if(isset($tree["RB"]["CONTEXT"])) $copy["CONTEXT"] = $tree["CONTEXT"] = $tree["RB"]["CONTEXT"]; else if(!$keepfuzzy) $copy["CONTEXT"] = $tree["CONTEXT"] = "*"; $copy["RB"]["SREF"] = &$tree["RB"]; } else { $recursL = $recursR = false; if($tree["CLASS"]=="OPS" && ($tree["NODETYPE"]==PHRASEA_OP_AND || $tree["NODETYPE"]==PHRASEA_OP_OR || $tree["NODETYPE"]==PHRASEA_OP_EXCEPT)) { // on a une branche à gauche de 'ET', 'OU', 'SAUF' $recursL = true; } if($tree["CLASS"]=="OPS" && ($tree["NODETYPE"]==PHRASEA_OP_AND || $tree["NODETYPE"]==PHRASEA_OP_OR || $tree["NODETYPE"]==PHRASEA_OP_EXCEPT)) { // on a une branche à droite de 'ET', 'OU', 'SAUF' $recursR = true; } if($recursL) $this->_extendThesaurusOnTerms($tree["LB"], $copy["LB"], $useFullText, $useThesaurus, $keepfuzzy, $depth+1, $path."L"); if($recursR) $this->_extendThesaurusOnTerms($tree["RB"], $copy["RB"], $useFullText, $useThesaurus, $keepfuzzy, $depth+1, $path."R"); } } } // étend (ou remplace) un terme cherché en 'full-text' à une recherche thesaurus (champ non spécifié, tout le thésaurus = '*') // le contexte éventuel est rapporté à l'opérateur ':' // ex : a[k] ==> (a ou (TH :[k] a)) function _extendToThesaurus_Simple(&$simple, $keepFullText, $keepfuzzy, $path) { $simple["PATH"] = $path; $context = null; if(isset($simple["CONTEXT"])) { $context = $simple["CONTEXT"]; // unset($simple["CONTEXT"]); } if($keepFullText) { // on fait un OU entre la recherche ft et une recherche th $tmp = array("CLASS"=>"OPS", "NODETYPE"=>PHRASEA_OP_OR, "VALUE"=>"OR", "PNUM"=>null, "DEPTH"=>$simple["DEPTH"], "LB"=>$simple, "RB"=>array("CLASS"=>"OPK", "NODETYPE"=>PHRASEA_OP_COLON, "VALUE"=>":", // "CONTEXT"=>$context, "PNUM"=>null, "DEPTH"=>$simple["DEPTH"]+1, "LB"=>array("CLASS"=>"SIMPLE", "NODETYPE"=>PHRASEA_KEYLIST, "VALUE"=>array("*"), "DEPTH"=>$simple["DEPTH"]+2 ), "RB"=>$simple ) ); // on vire le contexte du coté fulltext unset($tmp["LB"]["CONTEXT"]); // ajoute le contexte si nécéssaire if($context !== null) $tmp["RB"]["CONTEXT"] = $context; else if(!$keepfuzzy) $tmp["RB"]["CONTEXT"] = "*"; // corrige les profondeurs des 2 copies du 'simple' d'origine $tmp["LB"]["DEPTH"] += 1; $tmp["RB"]["RB"]["DEPTH"] += 2; // note une référence vers le terme d'origine $tmp["RB"]["RB"]["SREF"] = &$simple; $tmp["RB"]["RB"]["PATH"] = $path; } else { // on remplace le ft par du th $tmp = array("CLASS"=>"OPK", "NODETYPE"=>PHRASEA_OP_COLON, "VALUE"=>":", // "CONTEXT"=>$context, "PNUM"=>null, "DEPTH"=>$simple["DEPTH"]+1, "LB"=>array("CLASS"=>"SIMPLE", "NODETYPE"=>PHRASEA_KEYLIST, "VALUE"=>array("*"), "DEPTH"=>$simple["DEPTH"]+1 ), "RB"=>$simple ); // ajoute le contexte si nécéssaire if($context !== null) $tmp["CONTEXT"] = $context; else if(!$keepfuzzy) $tmp["CONTEXT"] = "*"; // corrige la profondeur de la copie du 'simple' d'origine $tmp["RB"]["DEPTH"] += 1; // note une référence vers le terme d'origine $tmp["RB"]["SREF"] = &$simple; $tmp["RB"]["PATH"] = $path; } return($tmp); } function thesaurus2(&$tree, $bid, $name, &$domthe, $searchsynonyms=true, $depth=0) { if($this->debug) print("thesaurus2:\n\$tree=" . var_export($tree, true) . "\n"); if($depth==0) $this->proposals["BASES"]["b$bid"] = array("BID"=>$bid, "NAME"=>$name, "TERMS"=>array()); if(!$tree) return(0); $ambigus = 0; if($tree["CLASS"]=="OPK" && $tree["NODETYPE"]==PHRASEA_OP_COLON) { // $ambigus = $this->setTids($tree, $tree["RB"], $bid, $domthe, $searchsynonyms); $ambigus = $this->setTids($tree, $bid, $domthe, $searchsynonyms); } elseif($tree["CLASS"]=="OPS" || $tree["CLASS"]=="OPK") { $ambigus += $this->thesaurus2($tree["LB"], $bid, $name, $domthe, $searchsynonyms, $depth+1); $ambigus += $this->thesaurus2($tree["RB"], $bid, $name, $domthe, $searchsynonyms, $depth+1); } return($ambigus); } function propAsHTML(&$node, &$html, $path, $depth=0) { global $parm; if($depth > 0) { $tsy = array(); $lngfound = "?"; for($n=$node->firstChild; $n; $n=$n->nextSibling) { if($n->nodeName=="sy") { $lng = $n->getAttribute("lng"); if(!array_key_exists($lng, $tsy)) $tsy[$lng] = array(); $zsy = array("v"=>$n->getAttribute("v"), "w"=>$n->getAttribute("w"), "k"=>$n->getAttribute("k")); if($lngfound=="?" || ($lng==$this->lng && $lngfound!=$lng)) { $lngfound = $lng; $syfound = $zsy; } else { } $tsy[$lng][] = $zsy; } } $alt = ""; foreach($tsy as $lng=>$tsy2) { foreach($tsy2 as $sy) { $alt .= $alt ? "\n" : ""; $alt .= "" . $lng . ": " . p4string::MakeString($sy["v"], "js"); } } $thtml = $syfound["v"]; $kjs = $syfound["k"] ? ("'" . p4string::MakeString($syfound["k"], "js") . "'") : "null"; $wjs = "'" . p4string::MakeString($syfound["w"], "js") . "'"; if($node->getAttribute("term")) { $thtml = "" . $thtml . ""; $node->removeAttribute("term"); } $tab = str_repeat("\t", $depth); $html .= $tab."
\n"; $html .= $tab."\t" . $thtml . "\n"; } $tsort = array(); for($n=$node->firstChild; $n; $n=$n->nextSibling) { if($n->nodeType==XML_ELEMENT_NODE && $n->getAttribute("marked")) // only 'te' marked { $lngfound = '?'; $syfound = '?'; for($n2=$n->firstChild; $n2; $n2=$n2->nextSibling) { if($n2->nodeName=='sy') { $lng = $n2->getAttribute('lng'); if($lngfound=="?" || ($lng==$this->lng && $lngfound!=$lng)) { $lngfound = $lng; $syfound = $n2->getAttribute('w'); } } } $n->removeAttribute("marked"); for($i=0; array_key_exists($syfound.$i, $tsort) && $i<9999; $i++) ; $tsort[$syfound.$i] = $n; } } ksort($tsort); // var_dump($tsort); foreach($tsort as $n) { $this->propAsHTML($n, $html, $path, $depth+1); } if($depth > 0) $html .= $tab."
\n"; } function _queryAsHTML($tree, $depth=0) { // printf("astext : "); // var_dump($tree); if($depth==0) { $ambiguites = array("n"=>0, "refs"=>array()); } switch($tree["CLASS"]) { case "SIMPLE": case "QSIMPLE": $w = is_array($tree["VALUE"]) ? implode(' ', $tree["VALUE"]) : $tree["VALUE"]; if(isset($tree["PATH"])) { $path = $tree["PATH"]; if(isset($tree["CONTEXT"])) $w .= ' [' . $tree["CONTEXT"] . ']'; $txt = '"'.$w.'"'; } else { if(isset($tree["CONTEXT"])) $w .= '[' . $tree["CONTEXT"] . ']'; if($tree["CLASS"] == "QSIMPLE") $txt = '"' . $w . '"'; else $txt = $w; } return($txt); break; case "PHRASEA_KW_ALL": return($tree["VALUE"][0]); break; case "PHRASEA_KW_LAST": if($tree["PNUM"]!==null) return("" . $tree["VALUE"][0] . "[" . $tree["PNUM"] . "]"); else return($tree["VALUE"][0]); break; case "OPS": case "OPK": if(isset($tree["PNUM"])) return('(' . $this->_queryAsHTML($tree["LB"], $depth+1) . ' ' . $tree["VALUE"] . '[' . $tree["PNUM"] . '] ' . $this->_queryAsHTML($tree["RB"], $depth+1) . ')'); else return('(' . $this->_queryAsHTML($tree["LB"], $depth+1) . ' ' . $tree["VALUE"] . ' ' . $this->_queryAsHTML($tree["RB"], $depth+1) . ')'); break; } } /* function _queryAsHTML($tree, $mouseCallback="void", $depth=0) { // printf("astext : "); // var_dump($tree); if($depth==0) { $ambiguites = array("n"=>0, "refs"=>array()); } switch($tree["CLASS"]) { case "SIMPLE": case "QSIMPLE": $w = is_array($tree["VALUE"]) ? implode(" ", $tree["VALUE"]) : $tree["VALUE"]; $tab = "\n" . str_repeat("\t", $depth); if(isset($tree["PATH"])) { $path = $tree["PATH"]; if(isset($tree["CONTEXT"])) $w .= " [" . $tree["CONTEXT"] . "]"; $txt = $tab . ""; $txt .= $tab . "\t\"" . $w . ""; // $txt .= $tab . "\t\""; $txt .= "\""; $txt .= $tab . "\n"; } else { if(isset($tree["CONTEXT"])) $w .= "[" . $tree["CONTEXT"] . "]"; if($tree["CLASS"] == "QSIMPLE") $txt = $tab . "\"" . $w . "\"\n"; else $txt = $tab . "" . $w . "\n"; } return($txt); break; case "PHRASEA_KW_ALL": return($tree["VALUE"][0]); break; case "PHRASEA_KW_LAST": if($tree["PNUM"]!==null) return("" . $tree["VALUE"][0] . "[" . $tree["PNUM"] . "]"); else return($tree["VALUE"][0]); break; case "OPS": case "OPK": if(isset($tree["PNUM"])) return("(" . $this->_queryAsHTML($tree["LB"], $mouseCallback, $depth+1) . " " . $tree["VALUE"] . "[" . $tree["PNUM"] . "] " . $this->_queryAsHTML($tree["RB"], $mouseCallback, $depth+1) . ")"); else return("(" . $this->_queryAsHTML($tree["LB"], $mouseCallback, $depth+1) . " " . $tree["VALUE"] . " " . $this->_queryAsHTML($tree["RB"], $mouseCallback, $depth+1) . ")"); break; } } */ function setTids(&$tree, $bid, &$domthe, $searchsynonyms) { if($this->debug) print("============================ setTids:\n\$tree=" . var_export($tree, true) . "\n"); // $this->proposals["BASES"]["b$bid"] = array("BID"=>$bid, "TERMS"=>array()); $ambigus = 0; if(is_array($w = $tree["RB"]["VALUE"])) $t = $w = implode(" ", $w); if(isset($tree["CONTEXT"])) { if(!$tree["CONTEXT"]) { $x0 = "@w=\"" . $w ."\" and not(@k)"; } else { if($tree["CONTEXT"]=="*") { $x0 = "@w=\"" . $w ."\""; } else { $x0 = "@w=\"" . $w ."\" and @k=\"" . $tree["CONTEXT"] . "\""; $t .= " (" . $tree["CONTEXT"] . ")"; } } } else { $x0 = "@w=\"" . $w ."\""; } $x = "/thesaurus//sy[" . $x0 ."]"; if($this->debug) printf("searching thesaurus with xpath='%s'
\n", $x); $dxp = new DOMXPath($domthe); $nodes = $dxp->query($x); if(!isset($tree["RB"]["SREF"]["TIDS"])) $tree["RB"]["SREF"]["TIDS"] = array(); if($nodes->length >= 1) { if($nodes->length == 1) { // on cherche un id simple, on utilisera la syntaxe sql 'like' (l'extension repérera elle méme la syntaxe car la value finira par '%') $this->addtoTIDS($tree["RB"], $bid, $nodes->item(0)); // $this->thesaurusDOMNodes[] = $nodes->item(0); } else { // on cherche plusieurs id's, on utilisera la syntaxe 'regexp' (l'extension repérera elle meme la syntaxe car la value finira par '$') $val = ""; foreach($nodes as $node) { if(!isset($tree["CONTEXT"])) $ambigus++; $this->addtoTIDS($tree["RB"], $bid, $node); } } $path = $tree["RB"]["SREF"]["PATH"]; $prophtml = ""; $this->propAsHTML($domthe->documentElement, $prophtml, $path); $this->proposals["BASES"]["b$bid"]["TERMS"][$path]["HTML"] = $prophtml; } else { // le mot n'est pas dans le thesaurus } return($ambigus); } /* function dead_setTids(&$tree, &$simple, $bid, &$domthe, $searchsynonyms) { // if($this->debug) print("setTids:\n\$tree=" . var_export($tree, true) . "\n"); $ambigus = 0; if(is_array($w = $simple["VALUE"])) $t = $w = implode(" ", $w); if(isset($tree["CONTEXT"])) { if(!$tree["CONTEXT"]) { $x0 = "@w=\"" . $w ."\" and not(@k)"; } else { if($tree["CONTEXT"]=="*") { $x0 = "@w=\"" . $w ."\""; } else { $x0 = "@w=\"" . $w ."\" and @k=\"" . $tree["CONTEXT"] . "\""; $t .= " (" . $tree["CONTEXT"] . ")"; } } } else { $x0 = "@w=\"" . $w ."\""; } $x = "/thesaurus//sy[" . $x0 ."]"; if($this->debug) printf("searching thesaurus with xpath='%s'
\n", $x); $dxp = new DOMXPath($domthe); $nodes = $dxp->query($x); if(!isset($tree["RB"]["SREF"]["TIDS"])) $tree["RB"]["SREF"]["TIDS"] = array(); if($nodes->length >= 1) { if($nodes->length == 1) { // on cherche un id simple, on utilisera la syntaxe sql 'like' (l'extension repérera elle méme la syntaxe car la value finira par '%') $this->addtoTIDS($tree["RB"], $bid, $nodes->item(0)); // $this->thesaurusDOMNodes[] = $nodes->item(0); } else { // on cherche plusieurs id's, on utilisera la syntaxe 'regexp' (l'extension repérera elle meme la syntaxe car la value finira par '$') $val = ""; foreach($nodes as $node) { if(!isset($tree["CONTEXT"])) $ambigus++; $this->addtoTIDS($tree["RB"], $bid, $node); } } $path = $tree["RB"]["SREF"]["PATH"]; $prophtml = ""; $this->propAsHTML($domthe->documentElement, $prophtml, $path); $this->proposals["TERMS"][$path]["HTML"] = $prophtml; } else { // le mot n'est pas dans le thesaurus } return($ambigus); } */ function containsColonOperator(&$tree) { if(!$tree) return(false); if($tree["CLASS"]=="OPK" && $tree["NODETYPE"]==PHRASEA_OP_COLON && ($tree["RB"]["CLASS"]=="SIMPLE" || $tree["RB"]["CLASS"]=="QSIMPLE")) return(true); $ret = false; if($tree["CLASS"]=="OPS" || $tree["CLASS"]=="OPK") { $ret |= $this->containsColonOperator($tree["LB"]); $ret |= $this->containsColonOperator($tree["RB"]); } return($ret); } function addtoTIDS(&$extendednode, $bid, $DOMnode) // ajoute un tid en évitant les doublons { $id = $DOMnode->getAttribute("id"); $pid = $DOMnode->parentNode->getAttribute("id"); $lng = $DOMnode->getAttribute("lng"); $w = $DOMnode->getAttribute("w"); $k = $DOMnode->getAttribute("k"); $p = $DOMnode->parentNode->getAttribute("v"); // le terme général (pére) du terme recherché : utile pour la levée d'ambiguité $path = $extendednode["SREF"]["PATH"]; if($this->debug) printf("found node id='%s', v='%s' w='%s', k='%s', p='%s' for node-path=%s \n", $id, $DOMnode->getAttribute("v"), $w, $k, $p, $path); if(!$k) $k = null; $found = false; foreach($extendednode["SREF"]["TIDS"] as $ztid) { if($ztid["bid"] != $bid) continue; if($ztid["pid"] == $pid) { $found = true; } else { // if($ztid["w"]==$w && $ztid["k"]==$k && $ztid["lng"]==$lng) // { // // FATAL : il y a un doublon réel dans le thesaurus de cette base (méme terme, méme contexte) // // printf("FATAL doublon on base %d (%s[%s])\n", $bid, $w, $k); // $found = true; // break; // } } } if(!$found) $extendednode["SREF"]["TIDS"][] = array("bid"=>$bid, "pid"=>$pid, "id"=>$id, "w"=>$w, "k"=>$k, "lng"=>$lng, "p"=>$p); // on liste les propositions de thésaurus pour ce node (dans l'arbre simple) if(!isset($this->proposals["BASES"]["b$bid"]["TERMS"][$path])) { // $this->proposals["TERMS"][$path] = array("TERM"=>implode(" ", $extendednode["VALUE"]), "PROPOSALS"=>array()); $term = implode(" ", $extendednode["VALUE"]); if(isset($extendednode["CONTEXT"]) && $extendednode["CONTEXT"]) { $term .= " (" . $extendednode["CONTEXT"] . ")"; } $this->proposals["BASES"]["b$bid"]["TERMS"][$path] = array("TERM"=>$term); // , "PROPOSALS"=>array() ); //, "PROPOSALS_TREE"=>new DOMDocument("1.0", "UTF-8")); } // printf("<%s id='%s'>
\n", $DOMnode->tagName, $DOMnode->getAttribute("id")); // printf("found node <%s id='%s' w='%s' k='%s'>
\n", $DOMnode->nodeName, $DOMnode->getAttribute('id'), $DOMnode->getAttribute('w'), $DOMnode->getAttribute('k')); // on marque le terme principal $DOMnode->parentNode->setAttribute("term", "1"); // on commence par marquer les fils directs. rappel:$DOMnode pointe sur un sy for($node=$DOMnode->parentNode->firstChild; $node; $node=$node->nextSibling) { if($node->nodeName=="te") { $node->setAttribute("marked", "1"); } } // puis par remonter au père for($node=$DOMnode->parentNode; $node && $node->nodeType==XML_ELEMENT_NODE && $node->parentNode; $node=$node->parentNode) { $id = $node->getAttribute("id"); if(!$id) break; // on a dépassé la racine du thésaurus $node->setAttribute("marked", "1"); // printf("<%s id='%s'
\n", $node->nodeName, $node->getAttribute("id")); } } function astext_ambigu($tree, &$ambiguites, $mouseCallback="void", $depth=0) { // printf("astext : "); // var_dump($tree); if($depth==0) { $ambiguites = array("n"=>0, "refs"=>array()); } switch($tree["CLASS"]) { case "SIMPLE": case "QSIMPLE": $prelink = $postlink = ""; $w = is_array($tree["VALUE"]) ? implode(" ", $tree["VALUE"]) : $tree["VALUE"]; $tab = "\n" . str_repeat("\t", $depth); if(isset($tree["TIDS"]) && count($tree["TIDS"])>1) { $ambiguites["refs"][$n=$ambiguites["n"]] = &$tree; $txt = $tab . ""; $txt .= $tab . "\t\"" . $w . ""; $txt .= $tab . "\t\""; $txt .= $tab . "\n"; $ambiguites["n"]++; } else { if(isset($tree["CONTEXT"])) $w .= "[" . $tree["CONTEXT"] . "]"; if($tree["CLASS"] == "QSIMPLE") $txt = $tab . "\"" . $w . "\"\n"; else $txt = $tab . "" . $w . "\n"; } return($txt); break; case "PHRASEA_KW_ALL": return($tree["VALUE"][0]); break; case "PHRASEA_KW_LAST": if($tree["PNUM"]!==null) return("" . $tree["VALUE"][0] . "[" . $tree["PNUM"] . "]"); else return($tree["VALUE"][0]); break; case "OPS": case "OPK": if(isset($tree["PNUM"])) return("(" . $this->astext_ambigu($tree["LB"], $ambiguites, $mouseCallback, $depth+1) . " " . $tree["VALUE"] . "[" . $tree["PNUM"] . "] " . $this->astext_ambigu($tree["RB"], $ambiguites, $mouseCallback, $depth+1) . ")"); else return("(" . $this->astext_ambigu($tree["LB"], $ambiguites, $mouseCallback, $depth+1) . " " . $tree["VALUE"] . " " . $this->astext_ambigu($tree["RB"], $ambiguites, $mouseCallback, $depth+1) . ")"); break; } } function get_ambigu(&$tree, $mouseCallback="void", $depth=0) { if(!$tree) return(""); unset($tree["DEPTH"]); if($tree["CLASS"]=="OPS" || $tree["CLASS"]=="OPK") { $this->get_ambigu($tree["LB"], $mouseCallback, $depth+1); $this->get_ambigu($tree["RB"], $mouseCallback, $depth+1); } else { } if($depth==0) { $t_ambiguites = array(); $r = ($this->astext_ambigu($tree, $t_ambiguites, $mouseCallback)); $t_ambiguites["query"] = $r; return($t_ambiguites); } } function set_default(&$tree, &$emptyw, $depth=0) { if(!$tree) return(true); if($tree["CLASS"]=="OPS" || $tree["CLASS"]=="OPK") { if($tree["CLASS"]=="OPS") { if(!$this->set_default($tree["LB"], $emptyw, $depth+1)) return(false); if(!$this->set_default($tree["RB"], $emptyw, $depth+1)) return(false); } else // OPK ! { // jy 20041223 : ne pas appliquer d'op. par def. derriere un op arith. // ex : "d < 1/2/2003" : grouper la liste "1","2","2004" en "mot" unique if(!$tree["LB"] || ($tree["LB"]["CLASS"]!="SIMPLE" && $tree["LB"]["CLASS"]!="QSIMPLE") || (is_array($tree["LB"]["VALUE"]) && count($tree["LB"]["VALUE"])!=1) ) { // un op. arith. doit étre précédé d'un seul nom de champ if($this->errmsg!="") $this->errmsg .= sprintf("\\n"); $this->errmsg .= sprintf(_('qparser::Formulation incorrecte, un nom de champs est attendu avant l operateur %s'),$tree["VALUE"]); return(false); } if(!$tree["RB"] || ($tree["RB"]["CLASS"]!="SIMPLE" && $tree["RB"]["CLASS"]!="QSIMPLE")) { // un op. arith. doit étre suivi d'une valeur if($this->errmsg!="") $this->errmsg .= sprintf("\\n"); $this->errmsg .= sprintf(_('qparser::Formulation incorrecte, une valeur est attendue apres l operateur %s'),$tree["VALUE"]); return(false); } if(is_array($tree["RB"]["VALUE"])) { $lw = ""; foreach($tree["RB"]["VALUE"] as $w) $lw .= ($lw==""?"":" ") . $w; $tree["RB"]["VALUE"] = $lw; } } /** gestion des branches null * a revoir car ca ppete pas d'erreur mais corrige automatiquement ****/ if(!isset($tree["RB"])) $tree=$tree["LB"]; else if(!isset($tree["LB"])) $tree=$tree["RB"]; } else { if(($tree["CLASS"]=="SIMPLE" || $tree["CLASS"]=="QSIMPLE")) { if(is_array($tree["VALUE"])) { $treetmp = null; $pnum = 0; for( $i=0; $i$tree["CLASS"], "NODETYPE"=>$tree["NODETYPE"], "VALUE"=>$tree["VALUE"][$i], "PNUM"=>$tree["PNUM"], "DEPTH"=>$tree["DEPTH"] ); $pnum = 0; } else { $dop = $tree["CLASS"]=="QSIMPLE" ? $this->quoted_defaultop : $this->defaultop ; $treetmp = array("CLASS"=>"OPS", "VALUE"=>$dop["VALUE"], "NODETYPE"=>$dop["NODETYPE"], "PNUM"=>$pnum, // peut-être écrasé par defaultop "DEPTH"=>$depth, "LB"=>$treetmp, "RB"=>array("CLASS"=>$tree["CLASS"], "NODETYPE"=>$tree["NODETYPE"], "VALUE"=>$tree["VALUE"][$i], "PNUM"=>$tree["PNUM"], "DEPTH"=>$tree["DEPTH"] ) ); if(array_key_exists("PNUM", $dop)) $treetmp["PNUM"] = $dop["PNUM"]; $pnum = 0; } } } $tree = $treetmp; } } } return(true); } function factor_or(&$tree) { do $n = $this->factor_or2($tree); while($n>0); } function factor_or2(&$tree, $depth=0) { // printf("
factor_or depth=%s sur
", $depth); // var_dump($tree); $nmodif=0; if($tree["CLASS"]=="OPS" || $tree["CLASS"]=="OPK") { if($tree["NODETYPE"]==PHRASEA_OP_OR && ($tree["LB"]["CLASS"]=="SIMPLE" || $tree["LB"]["CLASS"]=="QSIMPLE") && ($tree["RB"]["CLASS"]=="SIMPLE" || $tree["RB"]["CLASS"]=="QSIMPLE")) { $tree["CLASS"]="SIMPLE"; $tree["NODETYPE"]=PHRASEA_KEYLIST; $tree["VALUE"] = is_array($tree["LB"]["VALUE"]) ? $tree["LB"]["VALUE"] : array($tree["LB"]["VALUE"]); if(is_array($tree["RB"]["VALUE"])) { foreach($tree["RB"]["VALUE"] as $v) $tree["VALUE"][] = $v; } else $tree["VALUE"][] = $tree["RB"]["VALUE"]; unset($tree["LB"]); unset($tree["RB"]); // unset($tree["NODETYPE"]); unset($tree["PNUM"]); $nmodif++; // printf("
donne
"); // var_dump($tree); } else { $nmodif += $this->factor_or2($tree["LB"], $depth+1); $nmodif += $this->factor_or2($tree["RB"], $depth+1); } } // printf("
return %s
", $nmodif); return($nmodif); } function setNumValue(&$tree,$xmlstruct, $depth=0) { // var_dump($tree); if($tree["CLASS"]=="OPK") { if(isset($tree["RB"]) && ($tree["RB"]["CLASS"]=="SIMPLE" || $tree["RB"]["CLASS"]=="QSIMPLE") && ($tree["LB"]["CLASS"]=="SIMPLE" || $tree["LB"]["CLASS"]=="QSIMPLE") ) { if($sxe = simplexml_load_string($xmlstruct)) { $z = $sxe->xpath('/record/description'); if($z && is_array($z)) { foreach($z[0] as $ki => $vi) { $champ = null ; if(is_array($tree["LB"]["VALUE"])) $champ = $tree["LB"]["VALUE"][0]; else $champ = $tree["LB"]["VALUE"]; if($champ && strtoupper($ki)==strtoupper($champ)) { foreach( $vi->attributes() as $propname => $val ) { if(strtoupper($propname)==strtoupper("type")) { if( $tree["NODETYPE"]==PHRASEA_OP_EQUAL ) // cas particulier du "=" sur une date $this->changeNodeEquals($tree,$val); else $this->setNumValue2($tree["RB"],$val); } } } } } } } } if(isset($tree["LB"])) $this->setNumValue($tree["LB"],$xmlstruct, $depth+1); if(isset($tree["RB"])) $this->setNumValue($tree["RB"],$xmlstruct, $depth+1); } function changeNodeEquals(&$branch,$type) { if( strtoupper($type)==strtoupper("Date") ) { $branch = $this->changeNodeEquals2($branch); } } function changeNodeEquals2($oneBranch) { ## creation branche gauche avec ">=" // print("changeNodeEquals2\n"); // print("creation branche gauche ( '>=' ) \n"); $newTreeLB = array("CLASS"=>"OPK", "VALUE"=>">=", "NODETYPE"=>PHRASEA_OP_GEQT, "PNUM"=>NULL, "DEPTH"=>0, "LB"=>$oneBranch["LB"], "RB"=>array("CLASS"=>"SIMPLE", "VALUE"=>$this->isoDate($oneBranch["RB"]["VALUE"],false), "NODETYPE"=>PHRASEA_KEYLIST, "PNUM"=>NULL, "DEPTH"=>0) ); // var_dump($newTreeLB); // print("fin creation branche gauche ( '>=' ) \n"); ## fin creation branche gauche ( ">=" ) ## creation branche droite avec "<=" // print("creation branche droite avec '<=' \n"); $newTreeRB = array("CLASS"=>"OPK", "VALUE"=>"<=", "NODETYPE"=>PHRASEA_OP_LEQT, "PNUM"=>NULL, "DEPTH"=>0, "LB"=>$oneBranch["LB"], "RB"=>array("CLASS"=>"SIMPLE", "VALUE"=>$this->isoDate($oneBranch["RB"]["VALUE"],true), "NODETYPE"=>PHRASEA_KEYLIST, "PNUM"=>NULL, "DEPTH"=>0) ); // var_dump($newTreeRB); // print("fin creation branche droite avec '<=' \n"); ## fin creation branche droite ( "<=" ) $tree = array("CLASS"=>"OPS", "VALUE"=>"et", "NODETYPE"=>PHRASEA_OP_AND, "PNUM"=>NULL, "DEPTH"=>0, "LB"=>$newTreeLB, "RB"=>$newTreeRB ); // et on le retourne // var_dump($tree); return $tree; } function setNumValue2(&$branch,$type) { if( strtoupper($type)==strtoupper("Date") ) { $dateEnIso = $this->isoDate($branch["VALUE"]); $branch["VALUE"] = $dateEnIso; } } function isoDate($onedate,$max=false) { $v_y = "1900"; $v_m = "01"; $v_d = "01"; $v_h = $v_minutes = $v_s = "00"; if($max) { $v_h = $v_minutes = $v_s = "99"; } $tmp = $onedate; if(!is_array($tmp)) $tmp = explode(" ", $tmp ); switch(sizeof($tmp)) { // on a une date complete séparé avec des espaces, slash ou tiret case 3 : if(strlen($tmp[0])==4) { $v_y = $tmp[0]; $v_m = $tmp[1]; $v_d = $tmp[2]; // on a l'année en premier, on suppose alors que c'est de la forme YYYY MM DD } elseif(strlen($tmp[2])==4) { // on a l'année en dernier, on suppose alors que c'est de la forme DD MM YYYY $v_y = $tmp[2]; $v_m = $tmp[1]; $v_d = $tmp[0]; } else { // l'année est sur un 2 chiffre et pas 4 // ca fou la zone $v_d = $tmp[0]; $v_m = $tmp[1]; if($tmp[2]<20) $v_y = "20".$tmp[2]; else $v_y = "19".$tmp[2]; } break; case 2 : // On supposerait n'avoir que le mois et l'année if(strlen($tmp[0])==4) { $v_y = $tmp[0]; $v_m = $tmp[1]; // on a l'année en premier, on suppose alors que c'est de la forme YYYY MM DD if($max) $v_d = "99" ; else $v_d = "00" ; } elseif(strlen($tmp[1])==4) { // on a l'année en premier, on suppose alors que c'est de la forme DD MM YYYY $v_y = $tmp[1]; $v_m = $tmp[0]; if($max) $v_d = "99" ; else $v_d = "00" ; } else { // on a l'anné sur 2 chiffres if($tmp[1]<20) $v_y = "20".$tmp[1]; else $v_y = "19".$tmp[1]; $v_m = $tmp[0]; if($max) $v_d = "99" ; else $v_d = "00" ; } break; // lé ca devient la zone pour savoir si on a que l'année ou si c'est une date sans espaces,slash ou tiret case 1 : switch(strlen($tmp[0])) { case 14 : // date iso YYYYMMDDHHMMSS $v_y = substr($tmp[0],0,4); $v_m = substr($tmp[0],4,2); $v_d = substr($tmp[0],6,2); $v_h = substr($tmp[0],8,2); $v_minutes = substr($tmp[0],10,2); $v_s = substr($tmp[0],12,2); break; case 8 : // date iso YYYYMMDD $v_y = substr($tmp[0],0,4); $v_m = substr($tmp[0],4,2); $v_d = substr($tmp[0],6,2); break; case 6 : // date iso YYYYMM $v_y = substr($tmp[0],0,4); $v_m = substr($tmp[0],4,2); if($max) $v_d = "99" ; else $v_d = "00" ; break; case 4 : // date iso YYYY $v_y = $tmp[0]; if($max) $v_m = "99" ; else $v_m = "00" ; if($max) $v_d = "99" ; else $v_d = "00" ; break; case 2 : // date iso YY if($tmp[0]<20) $v_y = "20".$tmp[0]; else $v_y = "19".$tmp[0]; if($max) $v_m = "99" ; else $v_m = "00" ; if($max) $v_d = "99" ; else $v_d = "00" ; break; } break; } return("" . $v_y . $v_m . $v_d . $v_h . $v_minutes . $v_s); } function distrib_in(&$tree, $depth=0) { $opdistrib = array(PHRASEA_OP_AND, PHRASEA_OP_OR, PHRASEA_OP_EXCEPT, PHRASEA_OP_NEAR, PHRASEA_OP_BEFORE, PHRASEA_OP_AFTER); // ces opérateurs sont 'distribuables' autour d'un 'IN' // printf("
distrib_in depth=%s sur
", $depth); // var_dump($tree); if($tree["CLASS"]=="OPS" || $tree["CLASS"]=="OPK") { if($tree["NODETYPE"]==PHRASEA_OP_IN || $tree["CLASS"]=="OPK") { if($tree["LB"]["CLASS"]=="OPK") { // on a un truc du genre '(t1 = t2) dans t3' // ... on ne fait rien } if($tree["LB"]["CLASS"]=="OPS" && in_array($tree["LB"]["NODETYPE"], $opdistrib)) { // on a un truc du genre '(t1 op t2) {dans|=} t3', on distribue le dans é t1 et t2 // ==> ((t1 dans t3) op (t2 dans t3)) $m_v = $tree["VALUE"]; $m_t = $tree["CLASS"]; $m_o = $tree["NODETYPE"]; $m_n = $tree["PNUM"]; $tree["CLASS"] = $tree["LB"]["CLASS"]; $tree["NODETYPE"] = $tree["LB"]["NODETYPE"]; $tree["VALUE"] = $tree["LB"]["VALUE"]; $tree["PNUM"] = $tree["LB"]["PNUM"]; $tree["LB"]["CLASS"] = $m_t; $tree["LB"]["NODETYPE"] = $m_o; $tree["LB"]["VALUE"] = $m_v; $tree["LB"]["PNUM"] = $m_n; $tree["RB"] = array("CLASS"=>$m_t, "NODETYPE"=>$m_o, "VALUE"=>$m_v, "PNUM"=>$m_n, "LB"=>$tree["LB"]["RB"], "RB"=>$tree["RB"]); $tree["LB"]["RB"] = $tree["RB"]["RB"]; // var_dump($tree); // return; } if($tree["RB"]["CLASS"]=="OPS" && in_array($tree["RB"]["NODETYPE"], $opdistrib)) { // on a un truc du genre 't1 {dans|=} (t2 op t3)', on distribue le dans é t2 et t3 // ==> ((t1 dans t2) ou (t1 dans t3)) $m_v = $tree["VALUE"]; $m_t = $tree["CLASS"]; $m_o = $tree["NODETYPE"]; $m_n = $tree["PNUM"]; $tree["CLASS"] = $tree["RB"]["CLASS"]; $tree["NODETYPE"] = $tree["RB"]["NODETYPE"]; $tree["VALUE"] = $tree["RB"]["VALUE"]; $tree["PNUM"] = $tree["RB"]["PNUM"]; $tree["RB"]["CLASS"] = $m_t; $tree["RB"]["NODETYPE"] = $m_o; $tree["RB"]["VALUE"] = $m_v; $tree["RB"]["PNUM"] = $m_n; $tree["LB"] = array("CLASS"=>$m_t, "NODETYPE"=>$m_o, "VALUE"=>$m_v, "PNUM"=>$m_n, "LB"=>$tree["LB"], "RB"=>$tree["RB"]["LB"]); $tree["RB"]["LB"] = $tree["LB"]["LB"]; } } $this->distrib_in($tree["LB"], $depth+1); $this->distrib_in($tree["RB"], $depth+1); } } function makequery($tree) { $a = array($tree["NODETYPE"]); switch($tree["CLASS"]) { case "PHRASEA_KW_LAST": if($tree["PNUM"] !== NULL) $a[] = $tree["PNUM"]; break; case "PHRASEA_KW_ALL": break; case "SIMPLE": case "QSIMPLE": // pas de tid, c'est un terme normal if(is_array($tree["VALUE"])) { foreach($tree["VALUE"] as $k=>$v) $a[] = $v; } else { $a[] = $tree["VALUE"]; } break; case "OPK": if($tree["LB"] !== NULL) $a[] = $this->makequery($tree["LB"]); if($tree["RB"] !== NULL) $a[] = $this->makequery($tree["RB"]); break; case "OPS": if($tree["PNUM"] !== NULL) $a[] = intval($tree["PNUM"]); if($tree["LB"] !== NULL) $a[] = $this->makequery($tree["LB"]); if($tree["RB"] !== NULL) $a[] = $this->makequery($tree["RB"]); break; } return($a); } function maketree($depth, $inquote = false) { // printf("\n\n"); $tree = null; while($t = $this->nexttoken($inquote)) { if($this->debug) printf("got token %s of class %s\n", $t["VALUE"], $t["CLASS"]); switch($t["CLASS"]) { case "TOK_RP": if($inquote) { // quand on est entre guillements les tokens perdent leur signification $tree = $this->addtotree($tree, $t, $depth, $inquote); if(!$tree) return(null); } else { if($depth<=0) // ')' : retour de récursivité { if($this->errmsg!="") $this->errmsg .= sprintf("\\n"); $this->errmsg .= _('qparser:: erreur : trop de parentheses fermantes'); return(null); } return($tree); } break; case "TOK_LP": if($inquote) { // quand on est entre guillements les tokens perdent leur signification $tree = $this->addtotree($tree, $t, $depth, $inquote); if(!$tree) return(null); } else // '(' : appel récursif { if(!$tree) $tree = $this->maketree($depth+1); else { if(($tree["CLASS"]=="OPS" || $tree["CLASS"]=="OPK") && $tree["RB"]==null) { $tree["RB"] = $this->maketree($depth+1); if(!$tree["RB"]) $tree = null; } else { // ici on applique l'opérateur par défaut $tree = array("CLASS"=>"OPS", "VALUE"=>$this->defaultop["VALUE"], "NODETYPE"=>$this->defaultop["NODETYPE"], "PNUM"=>$this->defaultop["PNUM"], "DEPTH"=>$depth, "LB"=>$tree, "RB"=>$this->maketree($depth+1) ); } } if(!$tree) return(null); } break; case "TOK_VOID": // ce token est entre guillemets : on le saute break; case "TOK_QUOTE": // une expr entre guillemets est 'comme entre parenthéses', // sinon "a b" OU "x y" -> (((a B0 b) OU x) B0 y) au lieu de // "a b" OU "x y" -> ((a B0 b) OU (x B0 y)) if($inquote) { if($this->debug) { print("CLOSING QUOTE!\n"); } // fermeture des guillemets -> retour de récursivité if($depth<=0) // ')' : retour de récursivité { print("\nguillemets fermants en trop
"); return(null); } return($tree); } else { if($this->debug) { print("OPENING QUOTE!
"); } // ouverture des guillemets -> récursivité if(!$tree) $tree = $this->maketree($depth+1, true); else { if(($tree["CLASS"]=="OPS" || $tree["CLASS"]=="OPK") && $tree["RB"]==null) { $tree["RB"] = $this->maketree($depth+1, true); if(!$tree["RB"]) $tree = null; } else { // ici on applique l'opérateur par défaut $tree = array("CLASS"=>"OPS", "VALUE"=>$this->defaultop["VALUE"], "NODETYPE"=>$this->defaultop["NODETYPE"], "PNUM"=>$this->defaultop["PNUM"], "DEPTH"=>$depth, "LB"=>$tree, "RB"=>$this->maketree($depth+1, true) ); } } if(!$tree) return(null); } break; default: $tree = $this->addtotree($tree, $t, $depth, $inquote); if($this->debug) { print("---- après addtotree ----\n"); var_dump($tree); print("-------------------------\n"); } if(!$tree) return(null); break; } } if(($tree["CLASS"]=="OPS" || $tree["CLASS"]=="OPK") && $tree["RB"]==null) { if($this->errmsg!="") $this->errmsg .= sprintf("\\n"); $this->errmsg .= sprintf(_('qparser::Formulation incorrecte, une valeur est attendu apres %s'),$tree["VALUE"]); $tree = $tree["LB"]; } return($tree); } function addtotree($tree, $t, $depth, $inquote) { if($this->debug) { printf("addtotree({tree}, \$t[CLASS]='%s', \$t[VALUE]='%s', \$depth=%d, inquote=%s)\n", $t["CLASS"], $t["VALUE"], $depth, $inquote?"true":"false"); print("---- avant addtotree ----\n"); var_dump($tree); print("-------------------------\n"); } if(!$t) return($tree); switch($t["CLASS"]) { case "TOK_CONTEXT": // if($this->debug) // { // printf("addtotree({tree}, \$t='%s', \$depth=%d, inquote=%s)\n", $t["VALUE"], $depth, $inquote?"true":"false"); // var_dump($tree); // } if($tree["CLASS"]=="SIMPLE" || $tree["CLASS"]=="QSIMPLE") { // un [xxx] suit un terme : il introduit un contexte $tree["CONTEXT"] = $t["VALUE"]; } elseif($tree["CLASS"]=="OPS" || $tree["CLASS"]=="OPK") { if(!isset($tree["RB"]) || !$tree["RB"]) { // un [xxx] peut suivre un opérateur, c'est un paramétre normalement numérique $tree["PNUM"] = $t["VALUE"]; } else { // [xxx] suit un terme déjé en branche droite ? (ex: a ou b[k]) if($tree["RB"]["CLASS"]=="SIMPLE" || $tree["RB"]["CLASS"]=="QSIMPLE") $tree["RB"]["CONTEXT"] = $t["VALUE"]; else { if($this->errmsg!="") $this->errmsg .= "\\n"; $this->errmsg .= sprintf("le contexte [%s] ne peut suivre qu'un terme ou un opérateur
", $t["VALUE"] ); return(null); } } } else { if($this->errmsg!="") $this->errmsg .= "\\n"; $this->errmsg .= sprintf("le contexte [%s] ne peut suivre qu'un terme ou un opérateur
", $t["VALUE"] ); return(null); } return($tree); break; case "TOK_CMP": // < > <= >= <> = : sont des opérateurs de comparaison if(!$tree) { // printf("\nUne question ne peut commencer par '" . $t["VALUE"] . "'
"); if($this->errmsg!="") $this->errmsg .= "\\n"; $this->errmsg .= sprintf(_('qparser::erreur : une question ne peut commencer par %s'),$t["VALUE"]); return(null); } if(($tree["CLASS"]=="OPS"|| $tree["CLASS"]=="OPK") && $tree["RB"]==null) { // printf("'" . $t["VALUE"] . "' ne peut suivre un opérateur
"); if($this->errmsg!="") $this->errmsg .= "\\n"; $this->errmsg .= sprintf(_('qparser::Formulation incorrecte, ne peut suivre un operateur : %s'),$t["VALUE"]); return(null); } return(array("CLASS"=>"OPK", "VALUE"=>$t["VALUE"], "NODETYPE"=>$this->opk[$t["VALUE"]]["NODETYPE"], "PNUM"=>null, "DEPTH"=>$depth, "LB"=>$tree, "RB"=>null)); break; case "TOK_WORD": if($t["CLASS"]=="TOK_WORD" && isset($this->ops[$t["VALUE"]]) && !$inquote) { // ce mot est un opérateur phrasea if(!$tree) { // printf("\n581 : Une question ne peut commencer par un opérateur
"); if($this->errmsg!="") $this->errmsg .= "\\n"; $this->errmsg .= sprintf(_('qparser::erreur : une question ne peut commencer par %s'),$t["VALUE"]); return(null); } if(($tree["CLASS"]=="OPS" || $tree["CLASS"]=="OPK") && $tree["RB"]==null) { // printf("\n586 : Un opérateur ne peut suivre un opérateur
"); if($this->errmsg!="") $this->errmsg .= "\\n"; $this->errmsg .= sprintf(_('qparser::Formulation incorrecte, %s ne peut suivre un operateur'),$t["VALUE"]); return(null); } $pnum = null; if($this->ops[$t["VALUE"]]["CANNUM"]) { // cet opérateur peut étre suivi d'un nombre ('near', 'before', 'after') if($tn = $this->nexttoken()) { if($tn["CLASS"]=="TOK_WORD" && is_numeric($tn["VALUE"])) $pnum = (int)$tn["VALUE"]; else $this->ungettoken($tn["VALUE"]); } } return(array("CLASS"=>"OPS", "VALUE"=>$t["VALUE"], "NODETYPE"=>$this->ops[$t["VALUE"]]["NODETYPE"], "PNUM"=>$pnum, "DEPTH"=>$depth, "LB"=>$tree, "RB"=>null)); } else { // ce mot n'est pas un opérateur $pnum = null; $nodetype = PHRASEA_KEYLIST; if($t["CLASS"]=="TOK_WORD" && isset($this->spw[$t["VALUE"]]) && !$inquote) { // mais c'est un mot 'spécial' de phrasea ('last', 'all') $type = $this->spw[$t["VALUE"]]["CLASS"]; $nodetype = $this->spw[$t["VALUE"]]["NODETYPE"]; if($this->spw[$t["VALUE"]]["CANNUM"]) { // 'last' peut étre suivi d'un nombre if($tn = $this->nexttoken()) { if($tn["CLASS"]=="TOK_WORD" && is_numeric($tn["VALUE"])) $pnum = (int)$tn["VALUE"]; else $this->ungettoken($tn["VALUE"]); } } } else { //printf("sdfsdfsdfsd
"); $type = $inquote ? "QSIMPLE" : "SIMPLE"; } return($this->addsimple($t, $type, $nodetype, $pnum, $tree, $depth)); } break; } } function addsimple($t, $type, $nodetype, $pnum, $tree, $depth) { $nok = 0; $w = $t["VALUE"]; if($w != "?" && $w != "*") // on laisse passer les 'isolés' pour les traiter plus tard comme des mots vides { for($i=0; $ierrmsg!="") $this->errmsg .= sprintf("\\n"); $this->errmsg .= _('qparser:: Formulation incorrecte, necessite plus de caractere : ') ."
". GV_min_letters_truncation; return(null); } // $nok = 0; } else $nok++; } } if(!$tree) { return(array("CLASS"=>$type, "NODETYPE"=>$nodetype, "VALUE"=>array($t["VALUE"]), "PNUM"=>$pnum, "DEPTH"=>$depth)); } switch($tree["CLASS"]) { case "SIMPLE": case "QSIMPLE": if($type == "SIMPLE" || $type == "QSIMPLE") $tree["VALUE"][] = $t["VALUE"]; else { $tree = array("CLASS"=>"OPS", "VALUE"=>"et", "NODETYPE"=>PHRASEA_OP_AND, "PNUM"=>null, "DEPTH"=>$depth, "LB"=>$tree, "RB"=>array("CLASS"=>$type, "NODETYPE"=>$nodetype, "VALUE"=>array($t["VALUE"]), "PNUM"=>$pnum, "DEPTH"=>$depth)); } return($tree); case "OPS": case "OPK": if($tree["RB"]==null) { $tree["RB"] = array("CLASS"=>$type, "NODETYPE"=>$nodetype, "VALUE"=>array($t["VALUE"]), "PNUM"=>$pnum, "DEPTH"=>$depth); return($tree); } else { if(($tree["RB"]["CLASS"] == "SIMPLE" || $tree["RB"]["CLASS"] == "QSIMPLE") && $tree["RB"]["DEPTH"] == $depth) { $tree["RB"]["VALUE"][] = $t["VALUE"]; return($tree); } if(($tree["RB"]["CLASS"] == "PHRASEA_KW_LAST" || $tree["RB"]["CLASS"] == "PHRASEA_KW_ALL") && $tree["RB"]["DEPTH"] == $depth) { $tree["RB"] = array("CLASS"=>"OPS", "VALUE"=>"et", "NODETYPE"=>PHRASEA_OP_AND, "PNUM"=>null, "DEPTH"=>$depth, "LB"=>$tree["RB"], "RB"=>array("CLASS"=>$type, "NODETYPE"=>$nodetype, "VALUE"=>array($t["VALUE"]), "PNUM"=>$pnum, "DEPTH"=>$depth)); return($tree); } return(array("CLASS"=>"OPS", "VALUE"=>$this->defaultop["VALUE"], "NODETYPE"=>$this->defaultop["NODETYPE"], "PNUM"=>$this->defaultop["PNUM"], "DEPTH"=>$depth, "LB"=>$tree, "RB"=>array("CLASS"=>$type, "NODETYPE"=>$nodetype, "VALUE"=>array($t["VALUE"]), "PNUM"=>$pnum, "DEPTH"=>$depth) )); } case "PHRASEA_KW_LAST": case "PHRASEA_KW_ALL": return(array("CLASS"=>"OPS", "VALUE"=>"et", "NODETYPE"=>PHRASEA_OP_AND, "PNUM"=>null, "DEPTH"=>$depth, "LB"=>$tree, "RB"=>array("CLASS"=>$type, "NODETYPE"=>$nodetype, "VALUE"=>array($t["VALUE"]), "PNUM"=>$pnum, "DEPTH"=>$depth))); } } function ungettoken($s) { $this->phq = $s . " " . $this->phq; } function nexttoken($inquote=false) { global $endCharacters_utf8; // caractères terminaux dans 'lownodiacritics_utf8.php' global $t_LowNoDiacritics; // printf("\n", $this->phq); if($this->phq == "") return(null); switch($c = substr($this->phq, 0, 1)) { case "<": case ">": if($inquote) { $this->phq = $this->mb_ltrim(mb_substr($this->phq, 1, 99999, 'UTF-8'), 'UTF-8'); return(array("CLASS"=>"TOK_VOID", "VALUE"=>$c)); } $c2 = $c . substr($this->phq, 1, 1); if($c2=="<=" || $c2==">=" || $c2=="<>") { $this->phq = $this->mb_ltrim(mb_substr($this->phq, 2, 99999, 'UTF-8'), 'UTF-8'); $c = $c2; } else { $this->phq = $this->mb_ltrim(mb_substr($this->phq, 1, 99999, 'UTF-8'), 'UTF-8'); } return(array("CLASS"=>"TOK_CMP", "VALUE"=>$c)); break; case "=": if($inquote) { $this->phq = $this->mb_ltrim(mb_substr($this->phq, 1, 99999, 'UTF-8'), 'UTF-8'); return(array("CLASS"=>"TOK_VOID", "VALUE"=>$c)); } $this->phq = $this->mb_ltrim(mb_substr($this->phq, 1, 99999, 'UTF-8'), 'UTF-8'); return(array("CLASS"=>"TOK_CMP", "VALUE"=>"=")); break; case ":": if($inquote) { $this->phq = $this->mb_ltrim(mb_substr($this->phq, 1, 99999, 'UTF-8'), 'UTF-8'); return(array("CLASS"=>"TOK_VOID", "VALUE"=>$c)); } $this->phq = $this->mb_ltrim(mb_substr($this->phq, 1, 99999, 'UTF-8'), 'UTF-8'); return(array("CLASS"=>"TOK_CMP", "VALUE"=>":")); break; case "(": if($inquote) { $this->phq = $this->mb_ltrim(mb_substr($this->phq, 1, 99999, 'UTF-8'), 'UTF-8'); return(array("CLASS"=>"TOK_VOID", "VALUE"=>$c)); } $this->phq = $this->mb_ltrim(mb_substr($this->phq, 1, 99999, 'UTF-8'), 'UTF-8'); return(array("CLASS"=>"TOK_LP", "VALUE"=>"(")); break; case ")": if($inquote) { $this->phq = $this->mb_ltrim(mb_substr($this->phq, 1, 99999, 'UTF-8'), 'UTF-8'); return(array("CLASS"=>"TOK_VOID", "VALUE"=>$c)); } $this->phq = $this->mb_ltrim(mb_substr($this->phq, 1, 99999, 'UTF-8'), 'UTF-8'); return(array("CLASS"=>"TOK_RP", "VALUE"=>")")); break; case "[": // if($inquote) // { // $this->phq = ltrim(substr($this->phq, 1)); // return(array("CLASS"=>"TOK_VOID", "VALUE"=>$c)); // } // un '[' introduit un contexte qu'on lit jusqu'au ']' $closeb = mb_strpos($this->phq, "]", 1, 'UTF-8'); if($closeb !== false) { $context = $this->mb_trim(mb_substr($this->phq, 1, $closeb-1, 'UTF-8'), 'UTF-8'); $this->phq = $this->mb_ltrim(mb_substr($this->phq, $closeb+1, 99999, 'UTF-8'), 'UTF-8'); } else { $this->phq = $this->mb_ltrim(mb_substr($this->phq, 1, 99999, 'UTF-8'), 'UTF-8'); $this->phq = ""; } $context = noaccent_utf8($context, PARSED); return(array("CLASS"=>"TOK_CONTEXT", "VALUE"=>$context)); break; /* case "]": // if($inquote) // { // $this->phq = ltrim(substr($this->phq, 1)); // return(array("CLASS"=>"TOK_VOID", "VALUE"=>$c)); // } $this->phq = ltrim(substr($this->phq, 1)); return(array("CLASS"=>"TOK_RB", "VALUE"=>"]")); break; */ case "\"": $this->phq = $this->mb_ltrim(mb_substr($this->phq, 1, 99999, 'UTF-8'), 'UTF-8'); return(array("CLASS"=>"TOK_QUOTE", "VALUE"=>"\"")); break; default: $l = mb_strlen($this->phq, 'UTF-8'); $t = ""; $c_utf8 = ""; for($i=0; $i<$l; $i++) { if(mb_strpos($endCharacters_utf8, ($c_utf8 = mb_substr($this->phq, $i, 1, 'UTF-8')))===false) { // $c = mb_strtolower($c); // $t .= isset($this->noaccent[$c]) ? $this->noaccent[$c] : $c; // printf("// '%s' ", $c); $t .= isset($t_LowNoDiacritics["cmap"][$c_utf8]) ? $t_LowNoDiacritics["cmap"][$c_utf8] : $c_utf8; // printf(" : '%s' \n", $c); } else break; } if($c_utf8=="(" || $c_utf8==")" || $c_utf8=="[" || $c_utf8=="]" || $c_utf8=="=" || $c_utf8==":" || $c_utf8=="<" || $c_utf8==">" || $c_utf8=="\"") // ces caractéres sont des délimiteurs avec un sens, il faut les garder $this->phq = $this->mb_ltrim(mb_substr($this->phq, $i, 99999, 'UTF-8'), 'UTF-8'); else $this->phq = $this->mb_ltrim(mb_substr($this->phq, $i+1, 99999, 'UTF-8'), 'UTF-8'); // le délimiteur était une simple ponctuation, on le saute if($t != "") return(array("CLASS"=>"TOK_WORD", "VALUE"=>$t)); else return(array("CLASS"=>"TOK_VOID", "VALUE"=>$t)); break; } } } function txt2sql($t) { return( str_replace(array('\\', '\''), array('\\\\', '\'\''), $t) ); }