From 1fbddfec3e77ec02c52b8ec015a6f5408db273ef Mon Sep 17 00:00:00 2001 From: Mathieu Darse Date: Wed, 13 Aug 2014 14:22:06 +0200 Subject: [PATCH 1/3] PoC query parser With Jison PHP port --- build_query_parser.sh | 4 + grammar/query.jison | 105 ++ grammar/test_query | 1 + lib/Alchemy/Phrasea/Controller/Prod/Query.php | 1 + .../Elastic/ElasticSearchEngine.php | 4 +- .../SearchEngine/Elastic/QueryParser.php | 995 ++++++++++++++++++ www/skins/prod/jquery.main-prod.js | 8 + 7 files changed, 1117 insertions(+), 1 deletion(-) create mode 100644 build_query_parser.sh create mode 100644 grammar/query.jison create mode 100644 grammar/test_query create mode 100644 lib/Alchemy/Phrasea/SearchEngine/Elastic/QueryParser.php diff --git a/build_query_parser.sh b/build_query_parser.sh new file mode 100644 index 0000000000..8d6a673873 --- /dev/null +++ b/build_query_parser.sh @@ -0,0 +1,4 @@ +#!/bin/sh +cd grammar +node jison/ports/php/php.js query.jison +mv QueryParser.php ../lib/Alchemy/Phrasea/SearchEngine/Elastic/QueryParser.php \ No newline at end of file diff --git a/grammar/query.jison b/grammar/query.jison new file mode 100644 index 0000000000..eb80ebf4e2 --- /dev/null +++ b/grammar/query.jison @@ -0,0 +1,105 @@ +/* description: Parses Phraseanet search queries. */ + +/* lexical grammar */ +%lex + +/* lexical states */ +%x literal + +/* begin lexing */ +%% + +\s+ /* skip whitespace */ +"AND" return 'AND' +"and" return 'AND' +"et" return 'AND' +"OR" return 'OR' +"or" return 'OR' +"ou" return 'OR' +"IN" return 'IN' +"in" return 'IN' +"dans" return 'IN' +"(" return '(' +")" return ')' +'"' { + //js + this.begin('literal'); + //php $this->begin('literal'); + } +'"' { + //js + this.popState(); + //php $this->popState(); + } +([^"])* return 'LITERAL' +\w+ return 'WORD' +<> return 'EOF' + +/lex + + +/* operator associations and precedence */ + +%left 'AND' 'OR' +%left 'IN' + +%start query + + +%% /* language grammar */ + + +query + : expression EOF { + //js + console.log('[QUERY]', $$); + return $$; + //php return $$; + } + ; + +expression + : expression AND expression { + //js + $$ = '('+$1+' AND '+$3+')'; + console.log('[AND]', $$); + //php $$ = sprintf('(%s AND %s)', $1->text, $3->text); + } + | expression OR expression { + //js + $$ = '('+$1+' OR '+$3+')'; + console.log('[OR]', $$); + //php $$ = sprintf('(%s OR %s)', $1->text, $3->text); + } + | expression IN location { + //js + $$ = '('+$1+' IN '+$3+')'; + console.log('[IN]', $$); + //php $$ = sprintf('(%s IN %s)', $1->text, $3->text); + } + | '(' expression ')' { + //js + $$ = $2; + //php $$ = $2; + } + | text { + //js + $$ = '"'+$1+'"'; + console.log('[TEXT]', $$); + //php $$ = sprintf('"%s"', $1->text); + } + ; + +location + : WORD + ; + +text + : WORD + | LITERAL + ; + + +//option namespace:Alchemy\Phrasea\SearchEngine\Elastic +//option class:QueryParser +//option fileName:QueryParser.php diff --git a/grammar/test_query b/grammar/test_query new file mode 100644 index 0000000000..ee0c14e874 --- /dev/null +++ b/grammar/test_query @@ -0,0 +1 @@ +"chien blanc" ou chat and costume in title diff --git a/lib/Alchemy/Phrasea/Controller/Prod/Query.php b/lib/Alchemy/Phrasea/Controller/Prod/Query.php index b973b411db..6e74685840 100644 --- a/lib/Alchemy/Phrasea/Controller/Prod/Query.php +++ b/lib/Alchemy/Phrasea/Controller/Prod/Query.php @@ -191,6 +191,7 @@ class Query implements ControllerProviderInterface ); $json['query'] = $query; + $json['parsed_query'] = $result->getQuery(); $json['phrasea_props'] = $proposals; $json['total_answers'] = (int) $result->getAvailable(); $json['next_page'] = ($page < $npages && $result->getAvailable() > 0) ? ($page + 1) : false; diff --git a/lib/Alchemy/Phrasea/SearchEngine/Elastic/ElasticSearchEngine.php b/lib/Alchemy/Phrasea/SearchEngine/Elastic/ElasticSearchEngine.php index e4d1bd6c4d..09f0aa5577 100644 --- a/lib/Alchemy/Phrasea/SearchEngine/Elastic/ElasticSearchEngine.php +++ b/lib/Alchemy/Phrasea/SearchEngine/Elastic/ElasticSearchEngine.php @@ -274,6 +274,8 @@ class ElasticSearchEngine implements SearchEngineInterface */ public function query($query, $offset, $perPage, SearchEngineOptions $options = null) { + $parser = new QueryParser(); + $query = 'all' !== strtolower($query) ? $query : ''; $params = $this->createQueryParams($query, $options ?: new SearchEngineOptions()); $params['from'] = $offset; @@ -291,7 +293,7 @@ class ElasticSearchEngine implements SearchEngineInterface $results[] = new \record_adapter($this->app, $databoxId, $recordId, $n++); } - return new SearchEngineResult($results, $query, $res['took'], $offset, $res['hits']['total'], $res['hits']['total'], null, null, $suggestions, [], $this->indexName); + return new SearchEngineResult($results, $parser->parse($query), $res['took'], $offset, $res['hits']['total'], $res['hits']['total'], null, null, $suggestions, [], $this->indexName); } /** diff --git a/lib/Alchemy/Phrasea/SearchEngine/Elastic/QueryParser.php b/lib/Alchemy/Phrasea/SearchEngine/Elastic/QueryParser.php new file mode 100644 index 0000000000..d2356fc475 --- /dev/null +++ b/lib/Alchemy/Phrasea/SearchEngine/Elastic/QueryParser.php @@ -0,0 +1,995 @@ +symbols[0] = $symbol0; + $this->symbols["accept"] = $symbol0; + $this->symbols[1] = $symbol1; + $this->symbols["end"] = $symbol1; + $this->symbols[2] = $symbol2; + $this->symbols["error"] = $symbol2; + $this->symbols[3] = $symbol3; + $this->symbols["query"] = $symbol3; + $this->symbols[4] = $symbol4; + $this->symbols["expression"] = $symbol4; + $this->symbols[5] = $symbol5; + $this->symbols["EOF"] = $symbol5; + $this->symbols[6] = $symbol6; + $this->symbols["AND"] = $symbol6; + $this->symbols[7] = $symbol7; + $this->symbols["OR"] = $symbol7; + $this->symbols[8] = $symbol8; + $this->symbols["IN"] = $symbol8; + $this->symbols[9] = $symbol9; + $this->symbols["location"] = $symbol9; + $this->symbols[10] = $symbol10; + $this->symbols["("] = $symbol10; + $this->symbols[11] = $symbol11; + $this->symbols[")"] = $symbol11; + $this->symbols[12] = $symbol12; + $this->symbols["text"] = $symbol12; + $this->symbols[13] = $symbol13; + $this->symbols["WORD"] = $symbol13; + $this->symbols[14] = $symbol14; + $this->symbols["LITERAL"] = $symbol14; + + $this->terminals = array( + 2=>&$symbol2, + 5=>&$symbol5, + 6=>&$symbol6, + 7=>&$symbol7, + 8=>&$symbol8, + 10=>&$symbol10, + 11=>&$symbol11, + 13=>&$symbol13, + 14=>&$symbol14 + ); + + $table0 = new ParserState(0); + $table1 = new ParserState(1); + $table2 = new ParserState(2); + $table3 = new ParserState(3); + $table4 = new ParserState(4); + $table5 = new ParserState(5); + $table6 = new ParserState(6); + $table7 = new ParserState(7); + $table8 = new ParserState(8); + $table9 = new ParserState(9); + $table10 = new ParserState(10); + $table11 = new ParserState(11); + $table12 = new ParserState(12); + $table13 = new ParserState(13); + $table14 = new ParserState(14); + $table15 = new ParserState(15); + $table16 = new ParserState(16); + + $tableDefinition0 = array( + + 3=>new ParserAction($this->none, $table1), + 4=>new ParserAction($this->none, $table2), + 10=>new ParserAction($this->shift, $table3), + 12=>new ParserAction($this->none, $table4), + 13=>new ParserAction($this->shift, $table5), + 14=>new ParserAction($this->shift, $table6) + ); + + $tableDefinition1 = array( + + 1=>new ParserAction($this->accept) + ); + + $tableDefinition2 = array( + + 5=>new ParserAction($this->shift, $table7), + 6=>new ParserAction($this->shift, $table8), + 7=>new ParserAction($this->shift, $table9), + 8=>new ParserAction($this->shift, $table10) + ); + + $tableDefinition3 = array( + + 4=>new ParserAction($this->none, $table11), + 10=>new ParserAction($this->shift, $table3), + 12=>new ParserAction($this->none, $table4), + 13=>new ParserAction($this->shift, $table5), + 14=>new ParserAction($this->shift, $table6) + ); + + $tableDefinition4 = array( + + 5=>new ParserAction($this->reduce, $table6), + 6=>new ParserAction($this->reduce, $table6), + 7=>new ParserAction($this->reduce, $table6), + 8=>new ParserAction($this->reduce, $table6), + 11=>new ParserAction($this->reduce, $table6) + ); + + $tableDefinition5 = array( + + 5=>new ParserAction($this->reduce, $table8), + 6=>new ParserAction($this->reduce, $table8), + 7=>new ParserAction($this->reduce, $table8), + 8=>new ParserAction($this->reduce, $table8), + 11=>new ParserAction($this->reduce, $table8) + ); + + $tableDefinition6 = array( + + 5=>new ParserAction($this->reduce, $table9), + 6=>new ParserAction($this->reduce, $table9), + 7=>new ParserAction($this->reduce, $table9), + 8=>new ParserAction($this->reduce, $table9), + 11=>new ParserAction($this->reduce, $table9) + ); + + $tableDefinition7 = array( + + 1=>new ParserAction($this->reduce, $table1) + ); + + $tableDefinition8 = array( + + 4=>new ParserAction($this->none, $table12), + 10=>new ParserAction($this->shift, $table3), + 12=>new ParserAction($this->none, $table4), + 13=>new ParserAction($this->shift, $table5), + 14=>new ParserAction($this->shift, $table6) + ); + + $tableDefinition9 = array( + + 4=>new ParserAction($this->none, $table13), + 10=>new ParserAction($this->shift, $table3), + 12=>new ParserAction($this->none, $table4), + 13=>new ParserAction($this->shift, $table5), + 14=>new ParserAction($this->shift, $table6) + ); + + $tableDefinition10 = array( + + 9=>new ParserAction($this->none, $table14), + 13=>new ParserAction($this->shift, $table15) + ); + + $tableDefinition11 = array( + + 6=>new ParserAction($this->shift, $table8), + 7=>new ParserAction($this->shift, $table9), + 8=>new ParserAction($this->shift, $table10), + 11=>new ParserAction($this->shift, $table16) + ); + + $tableDefinition12 = array( + + 5=>new ParserAction($this->reduce, $table2), + 6=>new ParserAction($this->reduce, $table2), + 7=>new ParserAction($this->reduce, $table2), + 8=>new ParserAction($this->shift, $table10), + 11=>new ParserAction($this->reduce, $table2) + ); + + $tableDefinition13 = array( + + 5=>new ParserAction($this->reduce, $table3), + 6=>new ParserAction($this->reduce, $table3), + 7=>new ParserAction($this->reduce, $table3), + 8=>new ParserAction($this->shift, $table10), + 11=>new ParserAction($this->reduce, $table3) + ); + + $tableDefinition14 = array( + + 5=>new ParserAction($this->reduce, $table4), + 6=>new ParserAction($this->reduce, $table4), + 7=>new ParserAction($this->reduce, $table4), + 8=>new ParserAction($this->reduce, $table4), + 11=>new ParserAction($this->reduce, $table4) + ); + + $tableDefinition15 = array( + + 5=>new ParserAction($this->reduce, $table7), + 6=>new ParserAction($this->reduce, $table7), + 7=>new ParserAction($this->reduce, $table7), + 8=>new ParserAction($this->reduce, $table7), + 11=>new ParserAction($this->reduce, $table7) + ); + + $tableDefinition16 = array( + + 5=>new ParserAction($this->reduce, $table5), + 6=>new ParserAction($this->reduce, $table5), + 7=>new ParserAction($this->reduce, $table5), + 8=>new ParserAction($this->reduce, $table5), + 11=>new ParserAction($this->reduce, $table5) + ); + + $table0->setActions($tableDefinition0); + $table1->setActions($tableDefinition1); + $table2->setActions($tableDefinition2); + $table3->setActions($tableDefinition3); + $table4->setActions($tableDefinition4); + $table5->setActions($tableDefinition5); + $table6->setActions($tableDefinition6); + $table7->setActions($tableDefinition7); + $table8->setActions($tableDefinition8); + $table9->setActions($tableDefinition9); + $table10->setActions($tableDefinition10); + $table11->setActions($tableDefinition11); + $table12->setActions($tableDefinition12); + $table13->setActions($tableDefinition13); + $table14->setActions($tableDefinition14); + $table15->setActions($tableDefinition15); + $table16->setActions($tableDefinition16); + + $this->table = array( + + 0=>$table0, + 1=>$table1, + 2=>$table2, + 3=>$table3, + 4=>$table4, + 5=>$table5, + 6=>$table6, + 7=>$table7, + 8=>$table8, + 9=>$table9, + 10=>$table10, + 11=>$table11, + 12=>$table12, + 13=>$table13, + 14=>$table14, + 15=>$table15, + 16=>$table16 + ); + + $this->defaultActions = array( + + 7=>new ParserAction($this->reduce, $table1) + ); + + $this->productions = array( + + 0=>new ParserProduction($symbol0), + 1=>new ParserProduction($symbol3,2), + 2=>new ParserProduction($symbol4,3), + 3=>new ParserProduction($symbol4,3), + 4=>new ParserProduction($symbol4,3), + 5=>new ParserProduction($symbol4,3), + 6=>new ParserProduction($symbol4,1), + 7=>new ParserProduction($symbol9,1), + 8=>new ParserProduction($symbol12,1), + 9=>new ParserProduction($symbol12,1) + ); + + + + + //Setup Lexer + + $this->rules = array( + + 0=>"/^(?:\s+)/", + 1=>"/^(?:AND\b)/", + 2=>"/^(?:and\b)/", + 3=>"/^(?:et\b)/", + 4=>"/^(?:OR\b)/", + 5=>"/^(?:or\b)/", + 6=>"/^(?:ou\b)/", + 7=>"/^(?:IN\b)/", + 8=>"/^(?:in\b)/", + 9=>"/^(?:dans\b)/", + 10=>"/^(?:\()/", + 11=>"/^(?:\))/", + 12=>"/^(?:\")/", + 13=>"/^(?:\")/", + 14=>"/^(?:([^\"])*)/", + 15=>"/^(?:\w+)/", + 16=>"/^(?:$)/" + ); + + $this->conditions = array( + + "literal"=>new LexerConditions(array( 13,14), false), + "INITIAL"=>new LexerConditions(array( 0,1,2,3,4,5,6,7,8,9,10,11,12,15,16), true) + ); + + + } + + function parserPerformAction(&$thisS, &$yy, $yystate, &$s, $o) + { + +/* this == yyval */ + + +switch ($yystate) { +case 1: + return $thisS; + +break; +case 2: + $thisS = sprintf('(%s AND %s)', $s[$o-2]->text, $s[$o]->text); + +break; +case 3: + $thisS = sprintf('(%s OR %s)', $s[$o-2]->text, $s[$o]->text); + +break; +case 4: + $thisS = sprintf('(%s IN %s)', $s[$o-2]->text, $s[$o]->text); + +break; +case 5: + $thisS = $s[$o-1]; + +break; +case 6: + $thisS = sprintf('"%s"', $s[$o]->text); + +break; +} + + } + + function parserLex() + { + $token = $this->lexerLex(); // $end = 1 + + if (isset($token)) { + return $token; + } + + return $this->symbols["end"]; + } + + function parseError($str = "", ParserError $hash = null) + { + throw new Exception($str); + } + + function lexerError($str = "", LexerError $hash = null) + { + throw new Exception($str); + } + + function parse($input) + { + if (empty($this->table)) { + throw new Exception("Empty Table"); + } + $this->eof = new ParserSymbol("Eof", 1); + $firstAction = new ParserAction(0, $this->table[0]); + $firstCachedAction = new ParserCachedAction($firstAction); + $stack = array($firstCachedAction); + $stackCount = 1; + $vstack = array(null); + $vstackCount = 1; + $yy = null; + $_yy = null; + $recovering = 0; + $symbol = null; + $action = null; + $errStr = ""; + $preErrorSymbol = null; + $state = null; + + $this->setInput($input); + + while (true) { + // retrieve state number from top of stack + $state = $stack[$stackCount - 1]->action->state; + // use default actions if available + if ($state != null && isset($this->defaultActions[$state->index])) { + $action = $this->defaultActions[$state->index]; + } else { + if (empty($symbol) == true) { + $symbol = $this->parserLex(); + } + // read action for current state and first input + if (isset($state) && isset($state->actions[$symbol->index])) { + //$action = $this->table[$state][$symbol]; + $action = $state->actions[$symbol->index]; + } else { + $action = null; + } + } + + if ($action == null) { + if ($recovering == 0) { + // Report error + $expected = array(); + foreach($this->table[$state->index]->actions as $p => $item) { + if (!empty($this->terminals[$p]) && $p > 2) { + $expected[] = $this->terminals[$p]->name; + } + } + + $errStr = "Parse error on line " . ($this->yy->lineNo + 1) . ":\n" . $this->showPosition() . "\nExpecting " . implode(", ", $expected) . ", got '" . (isset($this->terminals[$symbol->index]) ? $this->terminals[$symbol->index]->name : 'NOTHING') . "'"; + + $this->parseError($errStr, new ParserError($this->match, $state, $symbol, $this->yy->lineNo, $this->yy->loc, $expected)); + } + } + + if ($state === null || $action === null) { + break; + } + + switch ($action->action) { + case 1: + // shift + //$this->shiftCount++; + $stack[] = new ParserCachedAction($action, $symbol); + $stackCount++; + + $vstack[] = clone($this->yy); + $vstackCount++; + + $symbol = ""; + if ($preErrorSymbol == null) { // normal execution/no error + $yy = clone($this->yy); + if ($recovering > 0) $recovering--; + } else { // error just occurred, resume old look ahead f/ before error + $symbol = $preErrorSymbol; + $preErrorSymbol = null; + } + break; + + case 2: + // reduce + $len = $this->productions[$action->state->index]->len; + // perform semantic action + $_yy = $vstack[$vstackCount - $len];// default to $S = $1 + // default location, uses first token for firsts, last for lasts + + if (isset($this->ranges)) { + //TODO: add ranges + } + + $r = $this->parserPerformAction($_yy->text, $yy, $action->state->index, $vstack, $vstackCount - 1); + + if (isset($r)) { + return $r; + } + + // pop off stack + while ($len > 0) { + $len--; + + array_pop($stack); + $stackCount--; + + array_pop($vstack); + $vstackCount--; + } + + if (is_null($_yy)) + { + $vstack[] = new ParserValue(); + } + else + { + $vstack[] = $_yy; + } + $vstackCount++; + + $nextSymbol = $this->productions[$action->state->index]->symbol; + // goto new state = table[STATE][NONTERMINAL] + $nextState = $stack[$stackCount - 1]->action->state; + $nextAction = $nextState->actions[$nextSymbol->index]; + + $stack[] = new ParserCachedAction($nextAction, $nextSymbol); + $stackCount++; + + break; + + case 3: + // accept + return true; + } + + } + + return true; + } + + + /* Jison generated lexer */ + public $eof; + public $yy = null; + public $match = ""; + public $matched = ""; + public $conditionStack = array(); + public $conditionStackCount = 0; + public $rules = array(); + public $conditions = array(); + public $done = false; + public $less; + public $more; + public $input; + public $offset; + public $ranges; + public $flex = false; + + function setInput($input) + { + $this->input = $input; + $this->more = $this->less = $this->done = false; + $this->yy = new ParserValue(); + $this->conditionStack = array('INITIAL'); + $this->conditionStackCount = 1; + + if (isset($this->ranges)) { + $loc = $this->yy->loc = new ParserLocation(); + $loc->Range(new ParserRange(0, 0)); + } else { + $this->yy->loc = new ParserLocation(); + } + $this->offset = 0; + } + + function input() + { + $ch = $this->input[0]; + $this->yy->text .= $ch; + $this->yy->leng++; + $this->offset++; + $this->match .= $ch; + $this->matched .= $ch; + $lines = preg_match("/(?:\r\n?|\n).*/", $ch); + if (count($lines) > 0) { + $this->yy->lineNo++; + $this->yy->lastLine++; + } else { + $this->yy->loc->lastColumn++; + } + if (isset($this->ranges)) { + $this->yy->loc->range->y++; + } + + $this->input = array_slice($this->input, 1); + return $ch; + } + + function unput($ch) + { + $len = strlen($ch); + $lines = explode("/(?:\r\n?|\n)/", $ch); + $linesCount = count($lines); + + $this->input = $ch . $this->input; + $this->yy->text = substr($this->yy->text, 0, $len - 1); + //$this->yylen -= $len; + $this->offset -= $len; + $oldLines = explode("/(?:\r\n?|\n)/", $this->match); + $oldLinesCount = count($oldLines); + $this->match = substr($this->match, 0, strlen($this->match) - 1); + $this->matched = substr($this->matched, 0, strlen($this->matched) - 1); + + if (($linesCount - 1) > 0) $this->yy->lineNo -= $linesCount - 1; + $r = $this->yy->loc->range; + $oldLinesLength = (isset($oldLines[$oldLinesCount - $linesCount]) ? strlen($oldLines[$oldLinesCount - $linesCount]) : 0); + + $this->yy->loc = new ParserLocation( + $this->yy->loc->firstLine, + $this->yy->lineNo, + $this->yy->loc->firstColumn, + $this->yy->loc->firstLine, + (empty($lines) ? + ($linesCount == $oldLinesCount ? $this->yy->loc->firstColumn : 0) + $oldLinesLength : + $this->yy->loc->firstColumn - $len) + ); + + if (isset($this->ranges)) { + $this->yy->loc->range = array($r[0], $r[0] + $this->yy->leng - $len); + } + } + + function more() + { + $this->more = true; + } + + function pastInput() + { + $past = substr($this->matched, 0, strlen($this->matched) - strlen($this->match)); + return (strlen($past) > 20 ? '...' : '') . preg_replace("/\n/", "", substr($past, -20)); + } + + function upcomingInput() + { + $next = $this->match; + if (strlen($next) < 20) { + $next .= substr($this->input, 0, 20 - strlen($next)); + } + return preg_replace("/\n/", "", substr($next, 0, 20) . (strlen($next) > 20 ? '...' : '')); + } + + function showPosition() + { + $pre = $this->pastInput(); + + $c = ''; + for($i = 0, $preLength = strlen($pre); $i < $preLength; $i++) { + $c .= '-'; + } + + return $pre . $this->upcomingInput() . "\n" . $c . "^"; + } + + function next() + { + if ($this->done == true) { + return $this->eof; + } + + if (empty($this->input)) { + $this->done = true; + } + + if ($this->more == false) { + $this->yy->text = ''; + $this->match = ''; + } + + $rules = $this->currentRules(); + for ($i = 0, $j = count($rules); $i < $j; $i++) { + preg_match($this->rules[$rules[$i]], $this->input, $tempMatch); + if ($tempMatch && (empty($match) || count($tempMatch[0]) > count($match[0]))) { + $match = $tempMatch; + $index = $i; + if (isset($this->flex) && $this->flex == false) { + break; + } + } + } + if ( $match ) { + $matchCount = strlen($match[0]); + $lineCount = preg_match("/(?:\r\n?|\n).*/", $match[0], $lines); + $line = ($lines ? $lines[$lineCount - 1] : false); + $this->yy->lineNo += $lineCount; + + $this->yy->loc = new ParserLocation( + $this->yy->loc->lastLine, + $this->yy->lineNo + 1, + $this->yy->loc->lastColumn, + ($line ? + count($line) - preg_match("/\r?\n?/", $line, $na) : + $this->yy->loc->lastColumn + $matchCount + ) + ); + + + $this->yy->text .= $match[0]; + $this->match .= $match[0]; + $this->matches = $match; + $this->matched .= $match[0]; + + $this->yy->leng = strlen($this->yy->text); + if (isset($this->ranges)) { + $this->yy->loc->range = new ParserRange($this->offset, $this->offset += $this->yy->leng); + } + $this->more = false; + $this->input = substr($this->input, $matchCount, strlen($this->input)); + $ruleIndex = $rules[$index]; + $nextCondition = $this->conditionStack[$this->conditionStackCount - 1]; + + $token = $this->lexerPerformAction($ruleIndex, $nextCondition); + + if ($this->done == true && empty($this->input) == false) { + $this->done = false; + } + + if (empty($token) == false) { + return $this->symbols[ + $token + ]; + } else { + return null; + } + } + + if (empty($this->input)) { + return $this->eof; + } else { + $this->lexerError("Lexical error on line " . ($this->yy->lineNo + 1) . ". Unrecognized text.\n" . $this->showPosition(), new LexerError("", -1, $this->yy->lineNo)); + return null; + } + } + + function lexerLex() + { + $r = $this->next(); + + while (is_null($r) && !$this->done) { + $r = $this->next(); + } + + return $r; + } + + function begin($condition) + { + $this->conditionStackCount++; + $this->conditionStack[] = $condition; + } + + function popState() + { + $this->conditionStackCount--; + return array_pop($this->conditionStack); + } + + function currentRules() + { + $peek = $this->conditionStack[$this->conditionStackCount - 1]; + return $this->conditions[$peek]->rules; + } + + function LexerPerformAction($avoidingNameCollisions, $YY_START = null) + { + + +; +switch($avoidingNameCollisions) { +case 0:/* skip whitespace */ +break; +case 1:return 6; +break; +case 2:return 6; +break; +case 3:return 6; +break; +case 4:return 7; +break; +case 5:return 7; +break; +case 6:return 7; +break; +case 7:return 8; +break; +case 8:return 8; +break; +case 9:return 8; +break; +case 10:return 10; +break; +case 11:return 11; +break; +case 12: + $this->begin('literal'); + +break; +case 13: + $this->popState(); + +break; +case 14:return 14; +break; +case 15:return 13; +break; +case 16:return 5; +break; +} + + } +} + +class ParserLocation +{ + public $firstLine = 1; + public $lastLine = 0; + public $firstColumn = 1; + public $lastColumn = 0; + public $range; + + public function __construct($firstLine = 1, $lastLine = 0, $firstColumn = 1, $lastColumn = 0) + { + $this->firstLine = $firstLine; + $this->lastLine = $lastLine; + $this->firstColumn = $firstColumn; + $this->lastColumn = $lastColumn; + } + + public function Range($range) + { + $this->range = $range; + } + + public function __clone() + { + return new ParserLocation($this->firstLine, $this->lastLine, $this->firstColumn, $this->lastColumn); + } +} + +class ParserValue +{ + public $leng = 0; + public $loc; + public $lineNo = 0; + public $text; + + function __clone() { + $clone = new ParserValue(); + $clone->leng = $this->leng; + if (isset($this->loc)) { + $clone->loc = clone $this->loc; + } + $clone->lineNo = $this->lineNo; + $clone->text = $this->text; + return $clone; + } +} + +class LexerConditions +{ + public $rules; + public $inclusive; + + function __construct($rules, $inclusive) + { + $this->rules = $rules; + $this->inclusive = $inclusive; + } +} + +class ParserProduction +{ + public $len = 0; + public $symbol; + + public function __construct($symbol, $len = 0) + { + $this->symbol = $symbol; + $this->len = $len; + } +} + +class ParserCachedAction +{ + public $action; + public $symbol; + + function __construct($action, $symbol = null) + { + $this->action = $action; + $this->symbol = $symbol; + } +} + +class ParserAction +{ + public $action; + public $state; + public $symbol; + + function __construct($action, &$state = null, &$symbol = null) + { + $this->action = $action; + $this->state = $state; + $this->symbol = $symbol; + } +} + +class ParserSymbol +{ + public $name; + public $index = -1; + public $symbols = array(); + public $symbolsByName = array(); + + function __construct($name, $index) + { + $this->name = $name; + $this->index = $index; + } + + public function addAction($a) + { + $this->symbols[$a->index] = $this->symbolsByName[$a->name] = $a; + } +} + +class ParserError +{ + public $text; + public $state; + public $symbol; + public $lineNo; + public $loc; + public $expected; + + function __construct($text, $state, $symbol, $lineNo, $loc, $expected) + { + $this->text = $text; + $this->state = $state; + $this->symbol = $symbol; + $this->lineNo = $lineNo; + $this->loc = $loc; + $this->expected = $expected; + } +} + +class LexerError +{ + public $text; + public $token; + public $lineNo; + + public function __construct($text, $token, $lineNo) + { + $this->text = $text; + $this->token = $token; + $this->lineNo = $lineNo; + } +} + +class ParserState +{ + public $index; + public $actions = array(); + + function __construct($index) + { + $this->index = $index; + } + + public function setActions(&$actions) + { + $this->actions = $actions; + } +} + +class ParserRange +{ + public $x; + public $y; + + function __construct($x, $y) + { + $this->x = $x; + $this->y = $y; + } +} \ No newline at end of file diff --git a/www/skins/prod/jquery.main-prod.js b/www/skins/prod/jquery.main-prod.js index a9dcdc9079..3f8790b9f0 100644 --- a/www/skins/prod/jquery.main-prod.js +++ b/www/skins/prod/jquery.main-prod.js @@ -466,6 +466,14 @@ function initAnswerForm() { }, success: function (datas) { + // DEBUG QUERY PARSER + var query = datas.parsed_query; + try { + query = JSON.parse(query); + } + catch (e) {} + console.log('Parsed Query:', query); + $('#answers').empty().append(datas.results).removeClass('loading'); From 12a757978f7860198986bf4ea9eb6e701e933a48 Mon Sep 17 00:00:00 2001 From: Mathieu Darse Date: Mon, 18 Aug 2014 11:53:05 +0200 Subject: [PATCH 2/3] Update PoC parser with AST --- grammar/query.jison | 72 +++- .../Elastic/AST/AndExpression.php | 38 ++ .../SearchEngine/Elastic/AST/InExpression.php | 25 ++ .../SearchEngine/Elastic/AST/KeywordNode.php | 28 ++ .../Phrasea/SearchEngine/Elastic/AST/Node.php | 8 + .../SearchEngine/Elastic/AST/OrExpression.php | 38 ++ .../SearchEngine/Elastic/AST/PrefixNode.php | 27 ++ .../Elastic/AST/QuotedTextNode.php | 18 + .../SearchEngine/Elastic/AST/TextNode.php | 27 ++ .../SearchEngine/Elastic/QueryParser.php | 384 ++++++++++++------ 10 files changed, 523 insertions(+), 142 deletions(-) create mode 100644 lib/Alchemy/Phrasea/SearchEngine/Elastic/AST/AndExpression.php create mode 100644 lib/Alchemy/Phrasea/SearchEngine/Elastic/AST/InExpression.php create mode 100644 lib/Alchemy/Phrasea/SearchEngine/Elastic/AST/KeywordNode.php create mode 100644 lib/Alchemy/Phrasea/SearchEngine/Elastic/AST/Node.php create mode 100644 lib/Alchemy/Phrasea/SearchEngine/Elastic/AST/OrExpression.php create mode 100644 lib/Alchemy/Phrasea/SearchEngine/Elastic/AST/PrefixNode.php create mode 100644 lib/Alchemy/Phrasea/SearchEngine/Elastic/AST/QuotedTextNode.php create mode 100644 lib/Alchemy/Phrasea/SearchEngine/Elastic/AST/TextNode.php diff --git a/grammar/query.jison b/grammar/query.jison index eb80ebf4e2..c0d363f39a 100644 --- a/grammar/query.jison +++ b/grammar/query.jison @@ -21,6 +21,7 @@ "dans" return 'IN' "(" return '(' ")" return ')' +"*" return '*' '"' { //js this.begin('literal'); @@ -40,6 +41,7 @@ /* operator associations and precedence */ +%left 'WORD' %left 'AND' 'OR' %left 'IN' @@ -50,56 +52,98 @@ query - : expression EOF { + : expressions EOF { //js console.log('[QUERY]', $$); return $$; - //php return $$; + /*php + return $$; + */ } ; +expressions + : expression expressions { + //js + $$ = '('+$1+' DEF_OP '+$2+')'; + console.log('[DEF_OP]', $$); + // $$ = sprintf('(%s DEF_OP %s)', $1->text, $2->text); + /*php + $$ = new AST\AndExpression($1->text, $2->text); + */ + } + | expression + ; + expression : expression AND expression { //js $$ = '('+$1+' AND '+$3+')'; console.log('[AND]', $$); - //php $$ = sprintf('(%s AND %s)', $1->text, $3->text); + /*php + $$ = new AST\AndExpression($1->text, $3->text); + */ } | expression OR expression { //js $$ = '('+$1+' OR '+$3+')'; console.log('[OR]', $$); - //php $$ = sprintf('(%s OR %s)', $1->text, $3->text); + /*php + $$ = new AST\OrExpression($1->text, $3->text); + */ } - | expression IN location { + | expression IN keyword { //js $$ = '('+$1+' IN '+$3+')'; console.log('[IN]', $$); - //php $$ = sprintf('(%s IN %s)', $1->text, $3->text); + /*php + $$ = new AST\InExpression($3->text, $1->text); + */ } | '(' expression ')' { //js $$ = $2; //php $$ = $2; } - | text { + | prefix + | text + ; + +keyword + : WORD { //js - $$ = '"'+$1+'"'; - console.log('[TEXT]', $$); - //php $$ = sprintf('"%s"', $1->text); + $$ = '<'+$1+'>'; + console.log('[FIELD]', $$); + //php $$ = new AST\KeywordNode($1->text); } ; -location - : WORD +prefix + : WORD '*' { + //js + $$ = $1+'*'; + console.log('[PREFIX]', $$); + //php $$ = new AST\PrefixNode($1->text); + } ; text - : WORD - | LITERAL + : WORD { + //js + $$ = '"'+$1+'"'; + console.log('[WORD]', $$); + //php $$ = new AST\TextNode($1->text); + } + | LITERAL { + //js + $$ = '"'+$1+'"'; + console.log('[LITERAL]', $$); + //php $$ = new AST\QuotedTextNode($1->text); + } ; //option namespace:Alchemy\Phrasea\SearchEngine\Elastic //option class:QueryParser +//option use:AST\Node, AST\TextNode, AST\QuotedTextNode, AST\PrefixNode, AST\KeywordNode, AST\AndExpression, AST\OrExpression, AST\InExpression; //option fileName:QueryParser.php diff --git a/lib/Alchemy/Phrasea/SearchEngine/Elastic/AST/AndExpression.php b/lib/Alchemy/Phrasea/SearchEngine/Elastic/AST/AndExpression.php new file mode 100644 index 0000000000..f435123879 --- /dev/null +++ b/lib/Alchemy/Phrasea/SearchEngine/Elastic/AST/AndExpression.php @@ -0,0 +1,38 @@ +members[] = $left; + $this->members[] = $right; + } + + public function getMembers() + { + return $this->members; + } + + public function getQuery($field = '_all') + { + $rules = array(); + foreach ($this->members as $member) { + $rules[] = $member->getQuery($field); + } + + return array( + 'bool' => array( + 'must' => count($rules) > 1 ? $rules : $rules[0] + ) + ); + } + + public function __toString() + { + return sprintf('(%s)', implode(' AND ', $this->members)); + } +} diff --git a/lib/Alchemy/Phrasea/SearchEngine/Elastic/AST/InExpression.php b/lib/Alchemy/Phrasea/SearchEngine/Elastic/AST/InExpression.php new file mode 100644 index 0000000000..4cdc5f501c --- /dev/null +++ b/lib/Alchemy/Phrasea/SearchEngine/Elastic/AST/InExpression.php @@ -0,0 +1,25 @@ +keyword = $keyword; + $this->expression = $expression; + } + + public function getQuery() + { + return $this->expression->getQuery($this->keyword->getValue()); + } + + public function __toString() + { + return sprintf('(%s IN %s)', $this->expression, $this->keyword); + } +} diff --git a/lib/Alchemy/Phrasea/SearchEngine/Elastic/AST/KeywordNode.php b/lib/Alchemy/Phrasea/SearchEngine/Elastic/AST/KeywordNode.php new file mode 100644 index 0000000000..3b42f719d2 --- /dev/null +++ b/lib/Alchemy/Phrasea/SearchEngine/Elastic/AST/KeywordNode.php @@ -0,0 +1,28 @@ +keyword = $keyword; + } + + public function getValue() + { + return $this->keyword; + } + + public function getQuery() + { + throw new LogicException("A keyword can't be converted to a query."); + } + + public function __toString() + { + return sprintf('<%s>', $this->keyword); + } +} diff --git a/lib/Alchemy/Phrasea/SearchEngine/Elastic/AST/Node.php b/lib/Alchemy/Phrasea/SearchEngine/Elastic/AST/Node.php new file mode 100644 index 0000000000..3a97246444 --- /dev/null +++ b/lib/Alchemy/Phrasea/SearchEngine/Elastic/AST/Node.php @@ -0,0 +1,8 @@ +members[] = $left; + $this->members[] = $right; + } + + public function getMembers() + { + return $this->members; + } + + public function getQuery($field = '_all') + { + $rules = array(); + foreach ($this->members as $member) { + $rules[] = $member->getQuery($field); + } + + return array( + 'bool' => array( + 'should' => count($rules) > 1 ? $rules : $rules[0] + ) + ); + } + + public function __toString() + { + return sprintf('(%s)', implode(' OR ', $this->members)); + } +} diff --git a/lib/Alchemy/Phrasea/SearchEngine/Elastic/AST/PrefixNode.php b/lib/Alchemy/Phrasea/SearchEngine/Elastic/AST/PrefixNode.php new file mode 100644 index 0000000000..67b8803325 --- /dev/null +++ b/lib/Alchemy/Phrasea/SearchEngine/Elastic/AST/PrefixNode.php @@ -0,0 +1,27 @@ +prefix = $prefix; + } + + public function getQuery($field = '_all') + { + return array( + 'prefix' => array( + $field => $this->prefix + ) + ); + } + + public function __toString() + { + return sprintf('prefix("%s")', $this->prefix); + } +} diff --git a/lib/Alchemy/Phrasea/SearchEngine/Elastic/AST/QuotedTextNode.php b/lib/Alchemy/Phrasea/SearchEngine/Elastic/AST/QuotedTextNode.php new file mode 100644 index 0000000000..7740a5d828 --- /dev/null +++ b/lib/Alchemy/Phrasea/SearchEngine/Elastic/AST/QuotedTextNode.php @@ -0,0 +1,18 @@ + array( + $field => array( + 'query' => $this->text, + 'operator' => 'and' + ) + ) + ); + } +} diff --git a/lib/Alchemy/Phrasea/SearchEngine/Elastic/AST/TextNode.php b/lib/Alchemy/Phrasea/SearchEngine/Elastic/AST/TextNode.php new file mode 100644 index 0000000000..d545103cc9 --- /dev/null +++ b/lib/Alchemy/Phrasea/SearchEngine/Elastic/AST/TextNode.php @@ -0,0 +1,27 @@ +text = $text; + } + + public function getQuery($field = '_all') + { + return array( + 'match' => array( + $field => $this->text + ) + ); + } + + public function __toString() + { + return sprintf('"%s"', $this->text); + } +} diff --git a/lib/Alchemy/Phrasea/SearchEngine/Elastic/QueryParser.php b/lib/Alchemy/Phrasea/SearchEngine/Elastic/QueryParser.php index d2356fc475..cbfc49ba00 100644 --- a/lib/Alchemy/Phrasea/SearchEngine/Elastic/QueryParser.php +++ b/lib/Alchemy/Phrasea/SearchEngine/Elastic/QueryParser.php @@ -4,7 +4,7 @@ namespace Alchemy\Phrasea\SearchEngine\Elastic; use Exception; - +use AST\Node, AST\TextNode, AST\QuotedTextNode, AST\PrefixNode, AST\KeywordNode, AST\AndExpression, AST\OrExpression, AST\InExpression; class QueryParser { @@ -33,17 +33,20 @@ class QueryParser $symbol1 = new ParserSymbol("end", 1); $symbol2 = new ParserSymbol("error", 2); $symbol3 = new ParserSymbol("query", 3); - $symbol4 = new ParserSymbol("expression", 4); + $symbol4 = new ParserSymbol("expressions", 4); $symbol5 = new ParserSymbol("EOF", 5); - $symbol6 = new ParserSymbol("AND", 6); - $symbol7 = new ParserSymbol("OR", 7); - $symbol8 = new ParserSymbol("IN", 8); - $symbol9 = new ParserSymbol("location", 9); - $symbol10 = new ParserSymbol("(", 10); - $symbol11 = new ParserSymbol(")", 11); - $symbol12 = new ParserSymbol("text", 12); - $symbol13 = new ParserSymbol("WORD", 13); - $symbol14 = new ParserSymbol("LITERAL", 14); + $symbol6 = new ParserSymbol("expression", 6); + $symbol7 = new ParserSymbol("AND", 7); + $symbol8 = new ParserSymbol("OR", 8); + $symbol9 = new ParserSymbol("IN", 9); + $symbol10 = new ParserSymbol("keyword", 10); + $symbol11 = new ParserSymbol("(", 11); + $symbol12 = new ParserSymbol(")", 12); + $symbol13 = new ParserSymbol("prefix", 13); + $symbol14 = new ParserSymbol("text", 14); + $symbol15 = new ParserSymbol("WORD", 15); + $symbol16 = new ParserSymbol("*", 16); + $symbol17 = new ParserSymbol("LITERAL", 17); $this->symbols[0] = $symbol0; $this->symbols["accept"] = $symbol0; $this->symbols[1] = $symbol1; @@ -53,38 +56,45 @@ class QueryParser $this->symbols[3] = $symbol3; $this->symbols["query"] = $symbol3; $this->symbols[4] = $symbol4; - $this->symbols["expression"] = $symbol4; + $this->symbols["expressions"] = $symbol4; $this->symbols[5] = $symbol5; $this->symbols["EOF"] = $symbol5; $this->symbols[6] = $symbol6; - $this->symbols["AND"] = $symbol6; + $this->symbols["expression"] = $symbol6; $this->symbols[7] = $symbol7; - $this->symbols["OR"] = $symbol7; + $this->symbols["AND"] = $symbol7; $this->symbols[8] = $symbol8; - $this->symbols["IN"] = $symbol8; + $this->symbols["OR"] = $symbol8; $this->symbols[9] = $symbol9; - $this->symbols["location"] = $symbol9; + $this->symbols["IN"] = $symbol9; $this->symbols[10] = $symbol10; - $this->symbols["("] = $symbol10; + $this->symbols["keyword"] = $symbol10; $this->symbols[11] = $symbol11; - $this->symbols[")"] = $symbol11; + $this->symbols["("] = $symbol11; $this->symbols[12] = $symbol12; - $this->symbols["text"] = $symbol12; + $this->symbols[")"] = $symbol12; $this->symbols[13] = $symbol13; - $this->symbols["WORD"] = $symbol13; + $this->symbols["prefix"] = $symbol13; $this->symbols[14] = $symbol14; - $this->symbols["LITERAL"] = $symbol14; + $this->symbols["text"] = $symbol14; + $this->symbols[15] = $symbol15; + $this->symbols["WORD"] = $symbol15; + $this->symbols[16] = $symbol16; + $this->symbols["*"] = $symbol16; + $this->symbols[17] = $symbol17; + $this->symbols["LITERAL"] = $symbol17; $this->terminals = array( 2=>&$symbol2, 5=>&$symbol5, - 6=>&$symbol6, 7=>&$symbol7, 8=>&$symbol8, - 10=>&$symbol10, + 9=>&$symbol9, 11=>&$symbol11, - 13=>&$symbol13, - 14=>&$symbol14 + 12=>&$symbol12, + 15=>&$symbol15, + 16=>&$symbol16, + 17=>&$symbol17 ); $table0 = new ParserState(0); @@ -104,15 +114,21 @@ class QueryParser $table14 = new ParserState(14); $table15 = new ParserState(15); $table16 = new ParserState(16); + $table17 = new ParserState(17); + $table18 = new ParserState(18); + $table19 = new ParserState(19); + $table20 = new ParserState(20); $tableDefinition0 = array( 3=>new ParserAction($this->none, $table1), 4=>new ParserAction($this->none, $table2), - 10=>new ParserAction($this->shift, $table3), - 12=>new ParserAction($this->none, $table4), - 13=>new ParserAction($this->shift, $table5), - 14=>new ParserAction($this->shift, $table6) + 6=>new ParserAction($this->none, $table3), + 11=>new ParserAction($this->shift, $table4), + 13=>new ParserAction($this->none, $table5), + 14=>new ParserAction($this->none, $table6), + 15=>new ParserAction($this->shift, $table7), + 17=>new ParserAction($this->shift, $table8) ); $tableDefinition1 = array( @@ -122,128 +138,197 @@ class QueryParser $tableDefinition2 = array( - 5=>new ParserAction($this->shift, $table7), - 6=>new ParserAction($this->shift, $table8), - 7=>new ParserAction($this->shift, $table9), - 8=>new ParserAction($this->shift, $table10) + 5=>new ParserAction($this->shift, $table9) ); $tableDefinition3 = array( - 4=>new ParserAction($this->none, $table11), - 10=>new ParserAction($this->shift, $table3), - 12=>new ParserAction($this->none, $table4), - 13=>new ParserAction($this->shift, $table5), - 14=>new ParserAction($this->shift, $table6) + 4=>new ParserAction($this->none, $table10), + 5=>new ParserAction($this->reduce, $table3), + 6=>new ParserAction($this->none, $table3), + 7=>new ParserAction($this->shift, $table11), + 8=>new ParserAction($this->shift, $table12), + 9=>new ParserAction($this->shift, $table13), + 11=>new ParserAction($this->shift, $table4), + 13=>new ParserAction($this->none, $table5), + 14=>new ParserAction($this->none, $table6), + 15=>new ParserAction($this->shift, $table7), + 17=>new ParserAction($this->shift, $table8) ); $tableDefinition4 = array( - 5=>new ParserAction($this->reduce, $table6), - 6=>new ParserAction($this->reduce, $table6), - 7=>new ParserAction($this->reduce, $table6), - 8=>new ParserAction($this->reduce, $table6), - 11=>new ParserAction($this->reduce, $table6) + 6=>new ParserAction($this->none, $table14), + 11=>new ParserAction($this->shift, $table4), + 13=>new ParserAction($this->none, $table5), + 14=>new ParserAction($this->none, $table6), + 15=>new ParserAction($this->shift, $table7), + 17=>new ParserAction($this->shift, $table8) ); $tableDefinition5 = array( 5=>new ParserAction($this->reduce, $table8), - 6=>new ParserAction($this->reduce, $table8), 7=>new ParserAction($this->reduce, $table8), 8=>new ParserAction($this->reduce, $table8), - 11=>new ParserAction($this->reduce, $table8) + 9=>new ParserAction($this->reduce, $table8), + 11=>new ParserAction($this->reduce, $table8), + 12=>new ParserAction($this->reduce, $table8), + 15=>new ParserAction($this->reduce, $table8), + 17=>new ParserAction($this->reduce, $table8) ); $tableDefinition6 = array( 5=>new ParserAction($this->reduce, $table9), - 6=>new ParserAction($this->reduce, $table9), 7=>new ParserAction($this->reduce, $table9), 8=>new ParserAction($this->reduce, $table9), - 11=>new ParserAction($this->reduce, $table9) + 9=>new ParserAction($this->reduce, $table9), + 11=>new ParserAction($this->reduce, $table9), + 12=>new ParserAction($this->reduce, $table9), + 15=>new ParserAction($this->reduce, $table9), + 17=>new ParserAction($this->reduce, $table9) ); $tableDefinition7 = array( - 1=>new ParserAction($this->reduce, $table1) + 5=>new ParserAction($this->reduce, $table12), + 7=>new ParserAction($this->reduce, $table12), + 8=>new ParserAction($this->reduce, $table12), + 9=>new ParserAction($this->reduce, $table12), + 11=>new ParserAction($this->reduce, $table12), + 12=>new ParserAction($this->reduce, $table12), + 15=>new ParserAction($this->reduce, $table12), + 16=>new ParserAction($this->shift, $table15), + 17=>new ParserAction($this->reduce, $table12) ); $tableDefinition8 = array( - 4=>new ParserAction($this->none, $table12), - 10=>new ParserAction($this->shift, $table3), - 12=>new ParserAction($this->none, $table4), - 13=>new ParserAction($this->shift, $table5), - 14=>new ParserAction($this->shift, $table6) + 5=>new ParserAction($this->reduce, $table13), + 7=>new ParserAction($this->reduce, $table13), + 8=>new ParserAction($this->reduce, $table13), + 9=>new ParserAction($this->reduce, $table13), + 11=>new ParserAction($this->reduce, $table13), + 12=>new ParserAction($this->reduce, $table13), + 15=>new ParserAction($this->reduce, $table13), + 17=>new ParserAction($this->reduce, $table13) ); $tableDefinition9 = array( - 4=>new ParserAction($this->none, $table13), - 10=>new ParserAction($this->shift, $table3), - 12=>new ParserAction($this->none, $table4), - 13=>new ParserAction($this->shift, $table5), - 14=>new ParserAction($this->shift, $table6) + 1=>new ParserAction($this->reduce, $table1) ); $tableDefinition10 = array( - 9=>new ParserAction($this->none, $table14), - 13=>new ParserAction($this->shift, $table15) + 5=>new ParserAction($this->reduce, $table2) ); $tableDefinition11 = array( - 6=>new ParserAction($this->shift, $table8), - 7=>new ParserAction($this->shift, $table9), - 8=>new ParserAction($this->shift, $table10), - 11=>new ParserAction($this->shift, $table16) + 6=>new ParserAction($this->none, $table16), + 11=>new ParserAction($this->shift, $table4), + 13=>new ParserAction($this->none, $table5), + 14=>new ParserAction($this->none, $table6), + 15=>new ParserAction($this->shift, $table7), + 17=>new ParserAction($this->shift, $table8) ); $tableDefinition12 = array( - 5=>new ParserAction($this->reduce, $table2), - 6=>new ParserAction($this->reduce, $table2), - 7=>new ParserAction($this->reduce, $table2), - 8=>new ParserAction($this->shift, $table10), - 11=>new ParserAction($this->reduce, $table2) + 6=>new ParserAction($this->none, $table17), + 11=>new ParserAction($this->shift, $table4), + 13=>new ParserAction($this->none, $table5), + 14=>new ParserAction($this->none, $table6), + 15=>new ParserAction($this->shift, $table7), + 17=>new ParserAction($this->shift, $table8) ); $tableDefinition13 = array( - 5=>new ParserAction($this->reduce, $table3), - 6=>new ParserAction($this->reduce, $table3), - 7=>new ParserAction($this->reduce, $table3), - 8=>new ParserAction($this->shift, $table10), - 11=>new ParserAction($this->reduce, $table3) + 10=>new ParserAction($this->none, $table18), + 15=>new ParserAction($this->shift, $table19) ); $tableDefinition14 = array( - 5=>new ParserAction($this->reduce, $table4), - 6=>new ParserAction($this->reduce, $table4), - 7=>new ParserAction($this->reduce, $table4), - 8=>new ParserAction($this->reduce, $table4), - 11=>new ParserAction($this->reduce, $table4) + 7=>new ParserAction($this->shift, $table11), + 8=>new ParserAction($this->shift, $table12), + 9=>new ParserAction($this->shift, $table13), + 12=>new ParserAction($this->shift, $table20) ); $tableDefinition15 = array( - 5=>new ParserAction($this->reduce, $table7), - 6=>new ParserAction($this->reduce, $table7), - 7=>new ParserAction($this->reduce, $table7), - 8=>new ParserAction($this->reduce, $table7), - 11=>new ParserAction($this->reduce, $table7) + 5=>new ParserAction($this->reduce, $table11), + 7=>new ParserAction($this->reduce, $table11), + 8=>new ParserAction($this->reduce, $table11), + 9=>new ParserAction($this->reduce, $table11), + 11=>new ParserAction($this->reduce, $table11), + 12=>new ParserAction($this->reduce, $table11), + 15=>new ParserAction($this->reduce, $table11), + 17=>new ParserAction($this->reduce, $table11) ); $tableDefinition16 = array( + 5=>new ParserAction($this->reduce, $table4), + 7=>new ParserAction($this->reduce, $table4), + 8=>new ParserAction($this->reduce, $table4), + 9=>new ParserAction($this->shift, $table13), + 11=>new ParserAction($this->reduce, $table4), + 12=>new ParserAction($this->reduce, $table4), + 15=>new ParserAction($this->reduce, $table4), + 17=>new ParserAction($this->reduce, $table4) + ); + + $tableDefinition17 = array( + 5=>new ParserAction($this->reduce, $table5), - 6=>new ParserAction($this->reduce, $table5), 7=>new ParserAction($this->reduce, $table5), 8=>new ParserAction($this->reduce, $table5), - 11=>new ParserAction($this->reduce, $table5) + 9=>new ParserAction($this->shift, $table13), + 11=>new ParserAction($this->reduce, $table5), + 12=>new ParserAction($this->reduce, $table5), + 15=>new ParserAction($this->reduce, $table5), + 17=>new ParserAction($this->reduce, $table5) + ); + + $tableDefinition18 = array( + + 5=>new ParserAction($this->reduce, $table6), + 7=>new ParserAction($this->reduce, $table6), + 8=>new ParserAction($this->reduce, $table6), + 9=>new ParserAction($this->reduce, $table6), + 11=>new ParserAction($this->reduce, $table6), + 12=>new ParserAction($this->reduce, $table6), + 15=>new ParserAction($this->reduce, $table6), + 17=>new ParserAction($this->reduce, $table6) + ); + + $tableDefinition19 = array( + + 5=>new ParserAction($this->reduce, $table10), + 7=>new ParserAction($this->reduce, $table10), + 8=>new ParserAction($this->reduce, $table10), + 9=>new ParserAction($this->reduce, $table10), + 11=>new ParserAction($this->reduce, $table10), + 12=>new ParserAction($this->reduce, $table10), + 15=>new ParserAction($this->reduce, $table10), + 17=>new ParserAction($this->reduce, $table10) + ); + + $tableDefinition20 = array( + + 5=>new ParserAction($this->reduce, $table7), + 7=>new ParserAction($this->reduce, $table7), + 8=>new ParserAction($this->reduce, $table7), + 9=>new ParserAction($this->reduce, $table7), + 11=>new ParserAction($this->reduce, $table7), + 12=>new ParserAction($this->reduce, $table7), + 15=>new ParserAction($this->reduce, $table7), + 17=>new ParserAction($this->reduce, $table7) ); $table0->setActions($tableDefinition0); @@ -263,6 +348,10 @@ class QueryParser $table14->setActions($tableDefinition14); $table15->setActions($tableDefinition15); $table16->setActions($tableDefinition16); + $table17->setActions($tableDefinition17); + $table18->setActions($tableDefinition18); + $table19->setActions($tableDefinition19); + $table20->setActions($tableDefinition20); $this->table = array( @@ -282,26 +371,35 @@ class QueryParser 13=>$table13, 14=>$table14, 15=>$table15, - 16=>$table16 + 16=>$table16, + 17=>$table17, + 18=>$table18, + 19=>$table19, + 20=>$table20 ); $this->defaultActions = array( - 7=>new ParserAction($this->reduce, $table1) + 9=>new ParserAction($this->reduce, $table1), + 10=>new ParserAction($this->reduce, $table2) ); $this->productions = array( 0=>new ParserProduction($symbol0), 1=>new ParserProduction($symbol3,2), - 2=>new ParserProduction($symbol4,3), - 3=>new ParserProduction($symbol4,3), - 4=>new ParserProduction($symbol4,3), - 5=>new ParserProduction($symbol4,3), - 6=>new ParserProduction($symbol4,1), - 7=>new ParserProduction($symbol9,1), - 8=>new ParserProduction($symbol12,1), - 9=>new ParserProduction($symbol12,1) + 2=>new ParserProduction($symbol4,2), + 3=>new ParserProduction($symbol4,1), + 4=>new ParserProduction($symbol6,3), + 5=>new ParserProduction($symbol6,3), + 6=>new ParserProduction($symbol6,3), + 7=>new ParserProduction($symbol6,3), + 8=>new ParserProduction($symbol6,1), + 9=>new ParserProduction($symbol6,1), + 10=>new ParserProduction($symbol10,1), + 11=>new ParserProduction($symbol13,2), + 12=>new ParserProduction($symbol14,1), + 13=>new ParserProduction($symbol14,1) ); @@ -323,17 +421,18 @@ class QueryParser 9=>"/^(?:dans\b)/", 10=>"/^(?:\()/", 11=>"/^(?:\))/", - 12=>"/^(?:\")/", + 12=>"/^(?:\*)/", 13=>"/^(?:\")/", - 14=>"/^(?:([^\"])*)/", - 15=>"/^(?:\w+)/", - 16=>"/^(?:$)/" + 14=>"/^(?:\")/", + 15=>"/^(?:([^\"])*)/", + 16=>"/^(?:\w+)/", + 17=>"/^(?:$)/" ); $this->conditions = array( - "literal"=>new LexerConditions(array( 13,14), false), - "INITIAL"=>new LexerConditions(array( 0,1,2,3,4,5,6,7,8,9,10,11,12,15,16), true) + "literal"=>new LexerConditions(array( 14,15), false), + "INITIAL"=>new LexerConditions(array( 0,1,2,3,4,5,6,7,8,9,10,11,12,13,16,17), true) ); @@ -347,27 +446,54 @@ class QueryParser switch ($yystate) { case 1: - return $thisS; + + return $thisS; + break; case 2: - $thisS = sprintf('(%s AND %s)', $s[$o-2]->text, $s[$o]->text); - -break; -case 3: - $thisS = sprintf('(%s OR %s)', $s[$o-2]->text, $s[$o]->text); + // $thisS = sprintf('(%s DEF_OP %s)', $s[$o-1]->text, $s[$o]->text); + + $thisS = new AST\AndExpression($s[$o-1]->text, $s[$o]->text); + break; case 4: - $thisS = sprintf('(%s IN %s)', $s[$o-2]->text, $s[$o]->text); + + $thisS = new AST\AndExpression($s[$o-2]->text, $s[$o]->text); + break; case 5: - $thisS = $s[$o-1]; + + $thisS = new AST\OrExpression($s[$o-2]->text, $s[$o]->text); + break; case 6: - $thisS = sprintf('"%s"', $s[$o]->text); + + $thisS = new AST\InExpression($s[$o]->text, $s[$o-2]->text); + + +break; +case 7: + $thisS = $s[$o-1]; + +break; +case 10: + $thisS = new AST\KeywordNode($s[$o]->text); + +break; +case 11: + $thisS = new AST\PrefixNode($s[$o-1]->text); + +break; +case 12: + $thisS = new AST\TextNode($s[$o]->text); + +break; +case 13: + $thisS = new AST\QuotedTextNode($s[$o]->text); break; } @@ -685,7 +811,7 @@ break; } } } - if ( $match ) { + if ( isset($match) && $match ) { $matchCount = strlen($match[0]); $lineCount = preg_match("/(?:\r\n?|\n).*/", $match[0], $lines); $line = ($lines ? $lines[$lineCount - 1] : false); @@ -776,41 +902,43 @@ break; switch($avoidingNameCollisions) { case 0:/* skip whitespace */ break; -case 1:return 6; +case 1:return 7; break; -case 2:return 6; +case 2:return 7; break; -case 3:return 6; +case 3:return 7; break; -case 4:return 7; +case 4:return 8; break; -case 5:return 7; +case 5:return 8; break; -case 6:return 7; +case 6:return 8; break; -case 7:return 8; +case 7:return 9; break; -case 8:return 8; +case 8:return 9; break; -case 9:return 8; +case 9:return 9; break; -case 10:return 10; +case 10:return 11; break; -case 11:return 11; +case 11:return 12; break; -case 12: +case 12:return 16; +break; +case 13: $this->begin('literal'); break; -case 13: +case 14: $this->popState(); break; -case 14:return 14; +case 15:return 17; break; -case 15:return 13; +case 16:return 15; break; -case 16:return 5; +case 17:return 5; break; } From 3f3ad4e0da391aea2cce843e252c81005051a2a9 Mon Sep 17 00:00:00 2001 From: Mathieu Darse Date: Mon, 18 Aug 2014 11:55:07 +0200 Subject: [PATCH 3/3] Passthrough ES query for debugging --- .../Elastic/ElasticSearchEngine.php | 50 ++++++++++--------- 1 file changed, 27 insertions(+), 23 deletions(-) diff --git a/lib/Alchemy/Phrasea/SearchEngine/Elastic/ElasticSearchEngine.php b/lib/Alchemy/Phrasea/SearchEngine/Elastic/ElasticSearchEngine.php index 09f0aa5577..083cbaebfd 100644 --- a/lib/Alchemy/Phrasea/SearchEngine/Elastic/ElasticSearchEngine.php +++ b/lib/Alchemy/Phrasea/SearchEngine/Elastic/ElasticSearchEngine.php @@ -272,28 +272,32 @@ class ElasticSearchEngine implements SearchEngineInterface /** * {@inheritdoc} */ - public function query($query, $offset, $perPage, SearchEngineOptions $options = null) + public function query($string, $offset, $perPage, SearchEngineOptions $options = null) { $parser = new QueryParser(); + $ast = $parser->parse($string); + $query = $ast->getQuery(); - $query = 'all' !== strtolower($query) ? $query : ''; - $params = $this->createQueryParams($query, $options ?: new SearchEngineOptions()); - $params['from'] = $offset; - $params['size'] = $perPage; + // $query = 'all' !== strtolower($query) ? $query : ''; + // $params = $this->createQueryParams($query, $options ?: new SearchEngineOptions()); + // $params['from'] = $offset; + // $params['size'] = $perPage; - $res = $this->doExecute('search', $params); + // $res = $this->doExecute('search', $params); $results = new ArrayCollection(); $suggestions = new ArrayCollection(); - $n = 0; + // $n = 0; - foreach ($res['hits']['hits'] as $hit) { - $databoxId = is_array($hit['fields']['databox_id']) ? array_pop($hit['fields']['databox_id']) : $hit['fields']['databox_id']; - $recordId = is_array($hit['fields']['record_id']) ? array_pop($hit['fields']['record_id']) : $hit['fields']['record_id']; - $results[] = new \record_adapter($this->app, $databoxId, $recordId, $n++); - } + // foreach ($res['hits']['hits'] as $hit) { + // $databoxId = is_array($hit['fields']['databox_id']) ? array_pop($hit['fields']['databox_id']) : $hit['fields']['databox_id']; + // $recordId = is_array($hit['fields']['record_id']) ? array_pop($hit['fields']['record_id']) : $hit['fields']['record_id']; + // $results[] = new \record_adapter($this->app, $databoxId, $recordId, $n++); + // } - return new SearchEngineResult($results, $parser->parse($query), $res['took'], $offset, $res['hits']['total'], $res['hits']['total'], null, null, $suggestions, [], $this->indexName); + $query['_ast'] = (string) $ast; + + return new SearchEngineResult($results, json_encode($query), null, null, null, null, null, null, $suggestions, [], $this->indexName); } /** @@ -413,17 +417,17 @@ class ElasticSearchEngine implements SearchEngineInterface private function createESQuery($query, SearchEngineOptions $options) { - $preg = preg_match('/\s?(recordid|storyid)\s?=\s?([0-9]+)/i', $query, $matches, 0, 0); + // $preg = preg_match('/\s?(recordid|storyid)\s?=\s?([0-9]+)/i', $query, $matches, 0, 0); - $search = []; - if ($preg > 0) { - $search['bool']['must'][] = [ - 'term' => [ - 'record_id' => $matches[2], - ], - ]; - $query = ''; - } + // $search = []; + // if ($preg > 0) { + // $search['bool']['must'][] = [ + // 'term' => [ + // 'record_id' => $matches[2], + // ], + // ]; + // $query = ''; + // } if ('' !== $query) { if (0 < count($options->getBusinessFieldsOn())) {