From 45c4c0f74fa3a2fc481c328c46a1c924b00969cb Mon Sep 17 00:00:00 2001 From: gitxxx Date: Thu, 1 Dec 2016 10:48:34 +0800 Subject: [PATCH 01/23] test --- README.md | 2 -- 1 file changed, 2 deletions(-) diff --git a/README.md b/README.md index 0ec8d4ee..90d520ca 100644 --- a/README.md +++ b/README.md @@ -153,5 +153,3 @@ http://localhost:9200/_sql/_explain?sql=select * from indexName limit 10 * ES GEO_DISTANCE * ES GEOHASH_GRID aggregation - - From 6f9777d4c1e6aa9c9aacd730c482e4d4b5afae2e Mon Sep 17 00:00:00 2001 From: gitxxx Date: Thu, 1 Dec 2016 11:33:46 +0800 Subject: [PATCH 02/23] Nested sorting is supported --- .../java/org/nlpcn/es4sql/domain/Field.java | 26 +++++++++ .../java/org/nlpcn/es4sql/domain/Order.java | 44 ++++++++++++++ .../java/org/nlpcn/es4sql/domain/Select.java | 12 +++- .../org/nlpcn/es4sql/parse/NestedType.java | 40 ++++++++----- .../org/nlpcn/es4sql/parse/SqlParser.java | 53 +++++++++++------ .../nlpcn/es4sql/query/maker/SortMaker.java | 57 +++++++++++++++++++ 6 files changed, 199 insertions(+), 33 deletions(-) create mode 100644 src/main/java/org/nlpcn/es4sql/query/maker/SortMaker.java diff --git a/src/main/java/org/nlpcn/es4sql/domain/Field.java b/src/main/java/org/nlpcn/es4sql/domain/Field.java index 465f2f10..c11f7c54 100644 --- a/src/main/java/org/nlpcn/es4sql/domain/Field.java +++ b/src/main/java/org/nlpcn/es4sql/domain/Field.java @@ -16,6 +16,32 @@ public class Field implements Cloneable{ private NestedType nested; private ChildrenType children; + private Where where; + private String mode; + private String sortName; + public Where getWhere() { + return where; + } + + public void setWhere(Where where) { + this.where = where; + } + public String getMode() { + return mode; + } + + public void setMode(String mode) { + this.mode = mode; + } + + public String getSortName() { + return sortName; + } + + public void setSortName(String sortName) { + this.sortName = sortName; + } + public Field(String name, String alias) { this.name = name; this.alias = alias; diff --git a/src/main/java/org/nlpcn/es4sql/domain/Order.java b/src/main/java/org/nlpcn/es4sql/domain/Order.java index de445ccf..98f8976c 100644 --- a/src/main/java/org/nlpcn/es4sql/domain/Order.java +++ b/src/main/java/org/nlpcn/es4sql/domain/Order.java @@ -8,12 +8,25 @@ public class Order { private String name; private String type; + private boolean isNested = false; + private String mode; + private String path; + private Where condition; public Order(String name, String type) { this.name = name; this.type = type; } + public Order(boolean isNested, String mode, String path, + Where condition, String name, String type) { + this.isNested = isNested; + this.mode = mode; + this.path = path; + this.condition = condition; + this.name = name; + this.type = type; + } public String getName() { return name; } @@ -30,4 +43,35 @@ public void setType(String type) { this.type = type; } + public boolean isNested() { + return isNested; + } + + public void setNested(boolean nested) { + isNested = nested; + } + + public String getMode() { + return mode; + } + + public void setMode(String mode) { + this.mode = mode; + } + + public String getPath() { + return path; + } + + public void setPath(String path) { + this.path = path; + } + + public Where getCondition() { + return condition; + } + + public void setCondition(Where condition) { + this.condition = condition; + } } diff --git a/src/main/java/org/nlpcn/es4sql/domain/Select.java b/src/main/java/org/nlpcn/es4sql/domain/Select.java index b4db1938..caad4688 100644 --- a/src/main/java/org/nlpcn/es4sql/domain/Select.java +++ b/src/main/java/org/nlpcn/es4sql/domain/Select.java @@ -3,7 +3,6 @@ import org.nlpcn.es4sql.domain.hints.Hint; import org.nlpcn.es4sql.parse.SubQueryExpression; -import java.sql.Array; import java.util.ArrayList; import java.util.Arrays; import java.util.List; @@ -78,7 +77,14 @@ public void addOrderBy(String name, String type) { } this.orderBys.add(new Order(name, type)); } - + //TODO new add + public void addOrderBy(boolean isNested, String mode, String path, + Where condition, String name, String type) { + if ("_score".equals(name)) { + isQuery = true; + } + this.orderBys.add(new Order(isNested, mode, path, condition, name, type)); + } public void addField(Field field) { if (field == null ) { @@ -88,7 +94,7 @@ public void addField(Field field) { this.selectAll = true; } - if(field instanceof MethodField && aggsFunctions.contains(field.getName().toUpperCase())) { + if(field instanceof MethodField && aggsFunctions.contains(field.getName().toUpperCase())) { isAgg = true; } diff --git a/src/main/java/org/nlpcn/es4sql/parse/NestedType.java b/src/main/java/org/nlpcn/es4sql/parse/NestedType.java index f1109c48..302f8e87 100644 --- a/src/main/java/org/nlpcn/es4sql/parse/NestedType.java +++ b/src/main/java/org/nlpcn/es4sql/parse/NestedType.java @@ -1,10 +1,7 @@ package org.nlpcn.es4sql.parse; import com.alibaba.druid.sql.ast.SQLExpr; -import com.alibaba.druid.sql.ast.expr.SQLIdentifierExpr; -import com.alibaba.druid.sql.ast.expr.SQLMethodInvokeExpr; -import com.alibaba.druid.sql.ast.expr.SQLPropertyExpr; -import com.alibaba.druid.sql.ast.expr.SQLTextLiteralExpr; +import com.alibaba.druid.sql.ast.expr.*; import org.nlpcn.es4sql.Util; import org.nlpcn.es4sql.domain.Where; import org.nlpcn.es4sql.exception.SqlParseException; @@ -15,9 +12,12 @@ * Created by Eliran on 12/11/2015. */ public class NestedType { + //字段名称 public String field; + //排序时的路径和条件 public String path; public Where where; + public String mode; private boolean reverse; private boolean simple; @@ -30,7 +30,7 @@ public boolean tryFillFromExpr(SQLExpr expr) throws SqlParseException { reverse = methodNameLower.equals("reverse_nested"); List parameters = method.getParameters(); - if (parameters.size() != 2 && parameters.size() != 1) + if (parameters.size() != 3 && parameters.size() != 2 && parameters.size() != 1) throw new SqlParseException("on nested object only allowed 2 parameters (field,path)/(path,conditions..) or 1 parameter (field) "); String field = Util.extendedToString(parameters.get(0)); @@ -52,25 +52,37 @@ public boolean tryFillFromExpr(SQLExpr expr) throws SqlParseException { } } else if (parameters.size() == 2) { - SQLExpr secondParameter = parameters.get(1); - if(secondParameter instanceof SQLTextLiteralExpr || secondParameter instanceof SQLIdentifierExpr || secondParameter instanceof SQLPropertyExpr) { - - String pathString = Util.extendedToString(secondParameter); - if(pathString.equals("")) + SQLExpr lastParameter = parameters.get(1); + if (lastParameter instanceof SQLTextLiteralExpr || lastParameter instanceof SQLIdentifierExpr || lastParameter instanceof SQLPropertyExpr) { + String pathString = Util.extendedToString(lastParameter); + if (pathString.equals("")) this.path = null; else this.path = pathString; this.simple = true; - } - else { + } else { this.path = field; Where where = Where.newInstance(); - new SqlParser().parseWhere(secondParameter,where); - if(where.getWheres().size() == 0) + new SqlParser().parseWhere(lastParameter, where); + if (where.getWheres().size() == 0) throw new SqlParseException("unable to parse filter where."); this.where = where; simple = false; } + } else if (parameters.size() == 3) { + this.path = field; + SQLExpr secondParameter = parameters.get(1); + if (secondParameter instanceof SQLAggregateExpr){ + this.mode = ((SQLAggregateExpr) secondParameter).getMethodName(); + this.field = ((SQLAggregateExpr) secondParameter).getArguments().get(0).toString(); + } + SQLExpr lastParameter = parameters.get(2); + Where where = Where.newInstance(); + new SqlParser().parseWhere(lastParameter, where); + if (where.getWheres().size() == 0) + throw new SqlParseException("unable to parse filter where."); + this.where = where; + simple = false; } return true; diff --git a/src/main/java/org/nlpcn/es4sql/parse/SqlParser.java b/src/main/java/org/nlpcn/es4sql/parse/SqlParser.java index 60df3739..9996c58b 100644 --- a/src/main/java/org/nlpcn/es4sql/parse/SqlParser.java +++ b/src/main/java/org/nlpcn/es4sql/parse/SqlParser.java @@ -1,15 +1,13 @@ package org.nlpcn.es4sql.parse; -import java.util.*; - +import com.alibaba.druid.sql.ast.SQLCommentHint; +import com.alibaba.druid.sql.ast.SQLExpr; +import com.alibaba.druid.sql.ast.SQLOrderBy; +import com.alibaba.druid.sql.ast.SQLOrderingSpecification; import com.alibaba.druid.sql.ast.expr.*; import com.alibaba.druid.sql.ast.statement.*; -import com.alibaba.druid.sql.ast.*; import com.alibaba.druid.sql.dialect.mysql.ast.expr.MySqlSelectGroupByExpr; import com.alibaba.druid.sql.dialect.mysql.ast.statement.MySqlSelectQueryBlock; - - -import org.nlpcn.es4sql.Util; import org.nlpcn.es4sql.domain.*; import org.nlpcn.es4sql.domain.Where.CONN; import org.nlpcn.es4sql.domain.hints.Hint; @@ -17,6 +15,11 @@ import org.nlpcn.es4sql.exception.SqlParseException; import org.nlpcn.es4sql.spatial.SpatialParamsFactory; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + /** * es sql support * @@ -51,7 +54,8 @@ private Select parseSelect(MySqlSelectQueryBlock query) throws SqlParseException select.getHints().addAll(parseHints(query.getHints())); findLimit(query.getLimit(), select); - + //TODO 修改 + //select.setOrderBys(findOrderBy(query.getOrderBy())); findOrderBy(query, select); findGroupBy(query, select); @@ -150,7 +154,7 @@ private void explanCond(String opear, SQLExpr expr, Where where) throws SqlParse if(Condition.OPEAR.methodNameToOpear.containsKey(methodName)){ Object[] methodParametersValue = getMethodValuesWithSubQueries(method); - Condition condition = null; + Condition condition = null; if(isNested) condition = new Condition(CONN.valueOf(opear) ,soExpr.getLeft().toString(), Condition.OPEAR.methodNameToOpear.get(methodName),methodParametersValue, nestedType); @@ -197,7 +201,7 @@ else if(isChildren) isChildren = true; } - Condition condition = null; + Condition condition = null; if(isNested) condition = new Condition(CONN.valueOf(opear), leftSide, siExpr.isNot() ? "NOT IN" : "IN", parseValue(siExpr.getTargetList()), nestedType); @@ -228,7 +232,7 @@ else if(isChildren) isChildren = true; } - Condition condition = null; + Condition condition = null; if(isNested) condition = new Condition(CONN.valueOf(opear), leftSide, between.isNot() ? "NOT BETWEEN" : "BETWEEN", new Object[]{parseValue(between.beginExpr), parseValue(between.endExpr)}, nestedType); @@ -340,7 +344,7 @@ else if (methodName.toLowerCase().equals("script")){ isChildren = true; } - Condition condition = null; + Condition condition = null; if(isNested) condition = new Condition(CONN.valueOf(opear), leftSide, sqlIn.isNot() ? "NOT IN" : "IN", subQueryExpression, nestedType); @@ -402,7 +406,7 @@ private Object parseValue(SQLExpr expr) throws SqlParseException { } - private void findSelect(MySqlSelectQueryBlock query, Select select,String tableAlias) throws SqlParseException { + private void findSelect(MySqlSelectQueryBlock query, Select select, String tableAlias) throws SqlParseException { List selectList = query.getSelectList(); for (SQLSelectItem sqlSelectItem : selectList) { Field field = FieldMaker.makeField(sqlSelectItem.getExpr(), sqlSelectItem.getAlias(),tableAlias); @@ -486,6 +490,14 @@ private String sameAliasWhere(Where where, String... aliases) throws SqlParseExc return firstAlias; } + private List findOrderBy(SQLOrderBy orderBy){ + List items = orderBy.getItems(); + for (SQLSelectOrderByItem sqlSelectOrderByItem : items) { + SQLExpr expr = sqlSelectOrderByItem.getExpr(); + } + return new ArrayList(); + } + private void findOrderBy(MySqlSelectQueryBlock query, Select select) throws SqlParseException { SQLOrderBy orderBy = query.getOrderBy(); @@ -501,8 +513,9 @@ private void findOrderBy(MySqlSelectQueryBlock query, Select select) throws SqlP private void addOrderByToSelect(Select select, List items, String alias) throws SqlParseException { for (SQLSelectOrderByItem sqlSelectOrderByItem : items) { SQLExpr expr = sqlSelectOrderByItem.getExpr(); - String orderByName = FieldMaker.makeField(expr, null, null).toString(); - + Field field = FieldMaker.makeField(expr, null, null); + String orderByName = field.getName(); + //String orderByName = FieldMaker.makeField(expr, null, null).toString(); if (sqlSelectOrderByItem.getType() == null) { sqlSelectOrderByItem.setType(SQLOrderingSpecification.ASC); } @@ -510,8 +523,16 @@ private void addOrderByToSelect(Select select, List items, orderByName = orderByName.replace("`", ""); if(alias!=null) orderByName = orderByName.replaceFirst(alias+"\\.",""); - select.addOrderBy(orderByName, type); - + if (field.isNested()) { + String path = field.getNestedPath(); + String mode = field.getMode(); + String sortName = field.getSortName(); + Where condition = null; + select.addOrderBy(true, mode, path, condition, sortName, type); + } else { + select.addOrderBy(orderByName, type); + } + //select.addOrderBy(orderByName, type); } } diff --git a/src/main/java/org/nlpcn/es4sql/query/maker/SortMaker.java b/src/main/java/org/nlpcn/es4sql/query/maker/SortMaker.java new file mode 100644 index 00000000..0a2d5728 --- /dev/null +++ b/src/main/java/org/nlpcn/es4sql/query/maker/SortMaker.java @@ -0,0 +1,57 @@ +package org.nlpcn.es4sql.query.maker; + +import org.elasticsearch.index.query.BoolQueryBuilder; +import org.elasticsearch.index.query.QueryBuilder; +import org.elasticsearch.index.query.QueryBuilders; +import org.elasticsearch.search.sort.SortBuilder; +import org.elasticsearch.search.sort.SortBuilders; +import org.elasticsearch.search.sort.SortOrder; +import org.nlpcn.es4sql.domain.Condition; +import org.nlpcn.es4sql.domain.Order; +import org.nlpcn.es4sql.domain.Where; + +import java.util.List; + +/** + * Created by fangbb on 2016-11-22. + */ +public class SortMaker extends Maker { + public SortMaker() { + super(true); + } + + public static SortBuilder explan(Order order) { + String flag = "_last"; + flag = order.getType().equals("DESC") ? "_last" : "_first"; + Where where = order.getCondition(); + String filedName = order.getName(); + String path = order.getPath(); + String mode = order.getMode() == null ? "sum" : order.getMode(); + String type = order.getType(); + String key = ""; + String val = ""; + BoolQueryBuilder queryBuilder = QueryBuilders.boolQuery(); + List conditions = where.getWheres(); + for (Where con : conditions) { + key = ((Condition) con).getName(); + val = ((Condition) con).getValue().toString(); + queryBuilder = explanSort(queryBuilder,key,val); + } + + SortBuilder sort = SortBuilders + .fieldSort(filedName) + .setNestedFilter(queryBuilder) + .setNestedPath(path) + .sortMode(mode) + .order(SortOrder.valueOf(type)) + .missing(flag); + return sort; + } + + public static BoolQueryBuilder explanSort(BoolQueryBuilder queryBuilder, String key, String value){ + QueryBuilder termQueryBuilder = QueryBuilders.termQuery(key, value); + queryBuilder = queryBuilder.should(termQueryBuilder); + return queryBuilder; + } + +} From 405cb99e75bc8bdd0202ff80c26381f9bc0bd461 Mon Sep 17 00:00:00 2001 From: gitxxx Date: Sun, 4 Dec 2016 14:42:12 +0800 Subject: [PATCH 03/23] fix bug --- .../org/nlpcn/es4sql/parse/FieldMaker.java | 5 ++ .../org/nlpcn/es4sql/parse/SqlParser.java | 2 +- .../es4sql/query/DefaultQueryAction.java | 11 ++- .../nlpcn/es4sql/query/ESActionFactory.java | 70 +++++++++---------- 4 files changed, 49 insertions(+), 39 deletions(-) diff --git a/src/main/java/org/nlpcn/es4sql/parse/FieldMaker.java b/src/main/java/org/nlpcn/es4sql/parse/FieldMaker.java index eb46060c..d56c6b6b 100644 --- a/src/main/java/org/nlpcn/es4sql/parse/FieldMaker.java +++ b/src/main/java/org/nlpcn/es4sql/parse/FieldMaker.java @@ -91,6 +91,11 @@ private static Field handleIdentifier(NestedType nestedType, String alias, Strin Field field = handleIdentifier(new SQLIdentifierExpr(nestedType.field), alias, tableAlias); field.setNested(nestedType); field.setChildren(null); + if (nestedType.mode != null) { + field.setWhere(nestedType.where); + field.setMode(nestedType.mode); + field.setSortName(nestedType.field); + } return field; } diff --git a/src/main/java/org/nlpcn/es4sql/parse/SqlParser.java b/src/main/java/org/nlpcn/es4sql/parse/SqlParser.java index 9996c58b..e2bbf403 100644 --- a/src/main/java/org/nlpcn/es4sql/parse/SqlParser.java +++ b/src/main/java/org/nlpcn/es4sql/parse/SqlParser.java @@ -527,7 +527,7 @@ private void addOrderByToSelect(Select select, List items, String path = field.getNestedPath(); String mode = field.getMode(); String sortName = field.getSortName(); - Where condition = null; + Where condition = field.getWhere(); select.addOrderBy(true, mode, path, condition, sortName, type); } else { select.addOrderBy(orderByName, type); diff --git a/src/main/java/org/nlpcn/es4sql/query/DefaultQueryAction.java b/src/main/java/org/nlpcn/es4sql/query/DefaultQueryAction.java index 19310d0c..3c96e9e0 100644 --- a/src/main/java/org/nlpcn/es4sql/query/DefaultQueryAction.java +++ b/src/main/java/org/nlpcn/es4sql/query/DefaultQueryAction.java @@ -10,12 +10,14 @@ import org.elasticsearch.index.query.QueryBuilders; import org.elasticsearch.script.Script; import org.elasticsearch.script.ScriptService; +import org.elasticsearch.search.sort.SortBuilder; import org.elasticsearch.search.sort.SortOrder; import org.nlpcn.es4sql.domain.*; import org.nlpcn.es4sql.domain.hints.Hint; import org.nlpcn.es4sql.domain.hints.HintType; import org.nlpcn.es4sql.exception.SqlParseException; import org.nlpcn.es4sql.query.maker.QueryMaker; +import org.nlpcn.es4sql.query.maker.SortMaker; /** * Transform SQL query to standard Elasticsearch search query @@ -149,8 +151,15 @@ private void setWhere(Where where) throws SqlParseException { * list of Order object */ private void setSorts(List orderBys) { + //TODO 需要修改 for (Order order : orderBys) { - request.addSort(order.getName(), SortOrder.valueOf(order.getType())); + boolean isNested = order.isNested(); + if (isNested) { + SortBuilder sort = SortMaker.explan(order); + request.addSort(sort); + } else { + request.addSort(order.getName(), SortOrder.valueOf(order.getType())); + } } } diff --git a/src/main/java/org/nlpcn/es4sql/query/ESActionFactory.java b/src/main/java/org/nlpcn/es4sql/query/ESActionFactory.java index 95cda676..3618951f 100644 --- a/src/main/java/org/nlpcn/es4sql/query/ESActionFactory.java +++ b/src/main/java/org/nlpcn/es4sql/query/ESActionFactory.java @@ -29,69 +29,66 @@ public class ESActionFactory { - /** - * Create the compatible Query object - * based on the SQL query. - * - * @param sql The SQL query. - * @return Query object. - */ - public static QueryAction create(Client client, String sql) throws SqlParseException, SQLFeatureNotSupportedException { - sql = sql.replaceAll("\n"," "); + /** + * Create the compatible Query object + * based on the SQL query. + * + * @param sql The SQL query. + * @return Query object. + */ + public static QueryAction create(Client client, String sql) throws SqlParseException, SQLFeatureNotSupportedException { + sql = sql.replaceAll("\n", " "); String firstWord = sql.substring(0, sql.indexOf(' ')); switch (firstWord.toUpperCase()) { - case "SELECT": - SQLQueryExpr sqlExpr = (SQLQueryExpr) toSqlExpr(sql); - if(isJoin(sqlExpr,sql)){ + case "SELECT": + SQLQueryExpr sqlExpr = (SQLQueryExpr) toSqlExpr(sql); + if (isJoin(sqlExpr, sql)) { JoinSelect joinSelect = new SqlParser().parseJoinSelect(sqlExpr); handleSubQueries(client, joinSelect.getFirstTable()); handleSubQueries(client, joinSelect.getSecondTable()); return ESJoinQueryActionFactory.createJoinAction(client, joinSelect); - } - else { + } else { Select select = new SqlParser().parseSelect(sqlExpr); handleSubQueries(client, select); return handleSelect(client, select); } - case "DELETE": + case "DELETE": SQLStatementParser parser = createSqlStatementParser(sql); - SQLDeleteStatement deleteStatement = parser.parseDeleteStatement(); - Delete delete = new SqlParser().parseDelete(deleteStatement); - return new DeleteQueryAction(client, delete); + SQLDeleteStatement deleteStatement = parser.parseDeleteStatement(); + Delete delete = new SqlParser().parseDelete(deleteStatement); + return new DeleteQueryAction(client, delete); case "SHOW": - return new ShowQueryAction(client,sql); - default: - throw new SQLFeatureNotSupportedException(String.format("Unsupported query: %s", sql)); - } - } + return new ShowQueryAction(client, sql); + default: + throw new SQLFeatureNotSupportedException(String.format("Unsupported query: %s", sql)); + } + } private static void handleSubQueries(Client client, Select select) throws SqlParseException { - if (select.containsSubQueries()) - { - for(SubQueryExpression subQueryExpression : select.getSubQueries()){ + if (select.containsSubQueries()) { + for (SubQueryExpression subQueryExpression : select.getSubQueries()) { QueryAction queryAction = handleSelect(client, subQueryExpression.getSelect()); - executeAndFillSubQuery(client , subQueryExpression,queryAction); + executeAndFillSubQuery(client, subQueryExpression, queryAction); } } } - private static void executeAndFillSubQuery(Client client , SubQueryExpression subQueryExpression,QueryAction queryAction) throws SqlParseException { + private static void executeAndFillSubQuery(Client client, SubQueryExpression subQueryExpression, QueryAction queryAction) throws SqlParseException { List values = new ArrayList<>(); Object queryResult; try { - queryResult = QueryActionElasticExecutor.executeAnyAction(client,queryAction); + queryResult = QueryActionElasticExecutor.executeAnyAction(client, queryAction); } catch (Exception e) { - throw new SqlParseException("could not execute SubQuery: " + e.getMessage()); + throw new SqlParseException("could not execute SubQuery: " + e.getMessage()); } String returnField = subQueryExpression.getReturnField(); - if(queryResult instanceof SearchHits) { + if (queryResult instanceof SearchHits) { SearchHits hits = (SearchHits) queryResult; for (SearchHit hit : hits) { - values.add(ElasticResultHandler.getFieldValue(hit,returnField)); + values.add(ElasticResultHandler.getFieldValue(hit, returnField)); } - } - else { + } else { throw new SqlParseException("on sub queries only support queries that return Hits and not aggregations"); } subQueryExpression.setValues(values.toArray()); @@ -111,9 +108,9 @@ private static SQLStatementParser createSqlStatementParser(String sql) { return new MySqlStatementParser(lexer); } - private static boolean isJoin(SQLQueryExpr sqlExpr,String sql) { + private static boolean isJoin(SQLQueryExpr sqlExpr, String sql) { MySqlSelectQueryBlock query = (MySqlSelectQueryBlock) sqlExpr.getSubQuery().getQuery(); - return query.getFrom() instanceof SQLJoinTableSource && sql.toLowerCase().contains("join"); + return query.getFrom() instanceof SQLJoinTableSource && sql.toLowerCase().contains("join"); } private static SQLExpr toSqlExpr(String sql) { @@ -128,5 +125,4 @@ private static SQLExpr toSqlExpr(String sql) { } - } From 338c13beed67662bd4563709533c9e289449e967 Mon Sep 17 00:00:00 2001 From: gitxxx Date: Fri, 9 Dec 2016 12:38:22 +0800 Subject: [PATCH 04/23] add seg() --- .../plugin/nlpcn/RestSqlAction.java | 13 +- .../elasticsearch/plugin/nlpcn/SqlPlug.java | 16 + .../nlpcn/preAnalyzer/AnsjAnalyzer.java | 16 + .../preAnalyzer/AnsjElasticConfigurator.java | 34 ++ .../plugin/nlpcn/preAnalyzer/Method.java | 66 ++++ .../plugin/nlpcn/preAnalyzer/SqlAnalyzer.java | 146 +++++++++ .../nlpcn/preAnalyzer/SqlParseAnalyzer.java | 286 +++++++++++++++++ .../plugin/nlpcn/preAnalyzer/SqlSegment.java | 172 ++++++++++ .../plugin/nlpcn/request/HttpRequester.java | 299 ++++++++++++++++++ .../plugin/nlpcn/request/HttpResponse.java | 124 ++++++++ .../java/org/nlpcn/es4sql/domain/Query.java | 4 + .../java/org/nlpcn/es4sql/domain/Select.java | 5 + src/test/java/org/nlpcn/es4sql/TestSql.java | 123 +++++++ 13 files changed, 1302 insertions(+), 2 deletions(-) create mode 100644 src/main/java/org/elasticsearch/plugin/nlpcn/preAnalyzer/AnsjAnalyzer.java create mode 100644 src/main/java/org/elasticsearch/plugin/nlpcn/preAnalyzer/AnsjElasticConfigurator.java create mode 100644 src/main/java/org/elasticsearch/plugin/nlpcn/preAnalyzer/Method.java create mode 100644 src/main/java/org/elasticsearch/plugin/nlpcn/preAnalyzer/SqlAnalyzer.java create mode 100644 src/main/java/org/elasticsearch/plugin/nlpcn/preAnalyzer/SqlParseAnalyzer.java create mode 100644 src/main/java/org/elasticsearch/plugin/nlpcn/preAnalyzer/SqlSegment.java create mode 100644 src/main/java/org/elasticsearch/plugin/nlpcn/request/HttpRequester.java create mode 100644 src/main/java/org/elasticsearch/plugin/nlpcn/request/HttpResponse.java create mode 100644 src/test/java/org/nlpcn/es4sql/TestSql.java diff --git a/src/main/java/org/elasticsearch/plugin/nlpcn/RestSqlAction.java b/src/main/java/org/elasticsearch/plugin/nlpcn/RestSqlAction.java index e5d5fe28..1d00a793 100644 --- a/src/main/java/org/elasticsearch/plugin/nlpcn/RestSqlAction.java +++ b/src/main/java/org/elasticsearch/plugin/nlpcn/RestSqlAction.java @@ -5,10 +5,12 @@ import org.elasticsearch.common.settings.Settings; import org.elasticsearch.plugin.nlpcn.executors.ActionRequestRestExecuterFactory; import org.elasticsearch.plugin.nlpcn.executors.RestExecutor; +import org.elasticsearch.plugin.nlpcn.preAnalyzer.SqlAnalyzer; +import org.elasticsearch.plugin.nlpcn.preAnalyzer.SqlParseAnalyzer; +import org.elasticsearch.plugin.nlpcn.preAnalyzer.SqlSegment; import org.elasticsearch.rest.*; import org.nlpcn.es4sql.SearchDao; import org.nlpcn.es4sql.query.QueryAction; -import org.nlpcn.es4sql.query.SqlElasticRequestBuilder; import java.util.Map; @@ -22,6 +24,7 @@ public RestSqlAction(Settings settings, Client client, RestController restContro restController.registerHandler(RestRequest.Method.GET, "/_sql/_explain", this); restController.registerHandler(RestRequest.Method.POST, "/_sql", this); restController.registerHandler(RestRequest.Method.GET, "/_sql", this); + restController.registerHandler(RestRequest.Method.GET, "/_sql/_seg", this); } @Override @@ -31,11 +34,17 @@ protected void handleRequest(RestRequest request, RestChannel channel, final Cli if (sql == null) { sql = request.content().toUtf8(); } + //ananlyze + //sql = SqlAnalyzer.seg(sql); + sql = SqlParseAnalyzer.seg(sql); SearchDao searchDao = new SearchDao(client); QueryAction queryAction= searchDao.explain(sql); // TODO add unittests to explain. (rest level?) - if (request.path().endsWith("/_explain")) { + if(request.path().endsWith("_seg")){ + BytesRestResponse bytesRestResponse = new BytesRestResponse(RestStatus.OK, sql); + channel.sendResponse(bytesRestResponse); + } else if (request.path().endsWith("/_explain")) { String jsonExplanation = queryAction.explain().explain(); BytesRestResponse bytesRestResponse = new BytesRestResponse(RestStatus.OK, jsonExplanation); channel.sendResponse(bytesRestResponse); diff --git a/src/main/java/org/elasticsearch/plugin/nlpcn/SqlPlug.java b/src/main/java/org/elasticsearch/plugin/nlpcn/SqlPlug.java index ad1fb096..fbc9f41d 100644 --- a/src/main/java/org/elasticsearch/plugin/nlpcn/SqlPlug.java +++ b/src/main/java/org/elasticsearch/plugin/nlpcn/SqlPlug.java @@ -1,9 +1,15 @@ package org.elasticsearch.plugin.nlpcn; +import org.elasticsearch.common.inject.AbstractModule; +import org.elasticsearch.common.inject.Module; +import org.elasticsearch.plugin.nlpcn.preAnalyzer.AnsjAnalyzer; import org.elasticsearch.plugins.Plugin; import org.elasticsearch.rest.RestModule; +import java.util.Collection; +import java.util.Collections; + public class SqlPlug extends Plugin { public SqlPlug() { @@ -18,6 +24,16 @@ public String name() { public String description() { return "Use sql to query elasticsearch."; } + @Override + public Collection nodeModules() { + return Collections. singletonList(new AnsjModule()); + } + public static class AnsjModule extends AbstractModule { + @Override + protected void configure() { + bind(AnsjAnalyzer.class).asEagerSingleton(); + } + } public void onModule(RestModule module) { diff --git a/src/main/java/org/elasticsearch/plugin/nlpcn/preAnalyzer/AnsjAnalyzer.java b/src/main/java/org/elasticsearch/plugin/nlpcn/preAnalyzer/AnsjAnalyzer.java new file mode 100644 index 00000000..a43e84c8 --- /dev/null +++ b/src/main/java/org/elasticsearch/plugin/nlpcn/preAnalyzer/AnsjAnalyzer.java @@ -0,0 +1,16 @@ +package org.elasticsearch.plugin.nlpcn.preAnalyzer; + +import org.elasticsearch.common.component.AbstractComponent; +import org.elasticsearch.common.inject.Inject; +import org.elasticsearch.common.settings.Settings; + +/** + * Created by fangbb on 2016-12-6. + */ +public class AnsjAnalyzer extends AbstractComponent { + @Inject + public AnsjAnalyzer(final Settings settings){ + super(settings); + AnsjElasticConfigurator.init(settings); + } +} diff --git a/src/main/java/org/elasticsearch/plugin/nlpcn/preAnalyzer/AnsjElasticConfigurator.java b/src/main/java/org/elasticsearch/plugin/nlpcn/preAnalyzer/AnsjElasticConfigurator.java new file mode 100644 index 00000000..2bb6b821 --- /dev/null +++ b/src/main/java/org/elasticsearch/plugin/nlpcn/preAnalyzer/AnsjElasticConfigurator.java @@ -0,0 +1,34 @@ +package org.elasticsearch.plugin.nlpcn.preAnalyzer; + + +import org.elasticsearch.common.logging.ESLogger; +import org.elasticsearch.common.logging.Loggers; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.env.Environment; + +/** + * Created by fangbb on 2016-11-25. + */ +public class AnsjElasticConfigurator { + public static ESLogger logger = Loggers.getLogger("sql-init"); + public static Environment environment; + public static String ES_IP = ""; + public static String ES_PORT = ""; + + public static void init(Settings settings) { + try { + ES_IP = settings.get("network.host"); + ES_PORT = settings.get("http.port"); + logger.info("es 本地IP:" + ES_IP); + logger.info("es 端口:"+ ES_PORT); + if (ES_IP==null || ES_IP.equals("")){ + logger.error("本地IP获取失败"); + } + if (ES_PORT==null || ES_PORT.equals("")){ + logger.error("ES服务获取失败"); + } + } catch (Exception e) { + logger.error("本地IP和ES端口获取失败"); + } + } +} diff --git a/src/main/java/org/elasticsearch/plugin/nlpcn/preAnalyzer/Method.java b/src/main/java/org/elasticsearch/plugin/nlpcn/preAnalyzer/Method.java new file mode 100644 index 00000000..fd3ca88d --- /dev/null +++ b/src/main/java/org/elasticsearch/plugin/nlpcn/preAnalyzer/Method.java @@ -0,0 +1,66 @@ +package org.elasticsearch.plugin.nlpcn.preAnalyzer; + +/** + * Created by fangbb on 2016-12-8. + */ +public class Method { + String parentMethod; + String childenMethod; + String params; + + public Method(String parentMethod, String childenMethod, String params) { + this.parentMethod = parentMethod; + this.childenMethod = childenMethod; + this.params = params; + } + + public Method() { + } + + public String getParentMethod() { + return parentMethod; + } + + public void setParentMethod(String parentMethod) { + this.parentMethod = parentMethod; + } + + public String getChildenMethod() { + return childenMethod; + } + + public void setChildenMethod(String childenMethod) throws Exception{ + if (this.parentMethod.equals("seg") && childenMethod != null) { + throw new Exception("seg("+childenMethod+"()) is erro"); + } else { + this.childenMethod = childenMethod; + } + + } + + public String getParams() { + return params; + } + + public void setParams(String params) { + this.params = params; + } + + public String getFunName() { + String name = ""; + if (this.parentMethod != null && !this.parentMethod.equals("seg")) { + name = this.parentMethod; + } else if (this.childenMethod != null && !this.childenMethod.equals("seg")) { + name = this.childenMethod; + } + return name; + } + + public boolean containSeg() { + if (this.parentMethod != null && this.parentMethod.equals("seg") + ||this.childenMethod!=null && this.childenMethod.equals("seg")) { + return true; + } + return false; + } +} diff --git a/src/main/java/org/elasticsearch/plugin/nlpcn/preAnalyzer/SqlAnalyzer.java b/src/main/java/org/elasticsearch/plugin/nlpcn/preAnalyzer/SqlAnalyzer.java new file mode 100644 index 00000000..6461f8d9 --- /dev/null +++ b/src/main/java/org/elasticsearch/plugin/nlpcn/preAnalyzer/SqlAnalyzer.java @@ -0,0 +1,146 @@ +package org.elasticsearch.plugin.nlpcn.preAnalyzer; + +import com.alibaba.druid.sql.SQLUtils; +import org.elasticsearch.plugin.nlpcn.request.HttpRequester; +import org.elasticsearch.plugin.nlpcn.request.HttpResponse; + +import java.net.URLEncoder; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** + * Created by fangbb on 2016-11-25. + */ +public class SqlAnalyzer { + public static String seg(String sql) { + sql = SQLUtils.formatMySql(sql); + if (sql.contains("seg(")) { + String pattern = " (.*?) = (.*?)seg\\((.*?)\\)"; + Pattern p = Pattern.compile(pattern); + Matcher m = p.matcher(sql); + String pAll = ""; + String pName = ""; + String pFun = ""; + String pTerm = ""; + while (m.find()) { + pAll = m.group(); + pName = parseName(m.group(1)); + pFun = parseFun(m.group(2)); + pTerm = m.group(3); + if (pFun == null || pFun.equals("")) { + sql = replaceSql(sql, pAll, pName, pTerm); + } else { + sql = replaceSql(sql, pAll, pName, pFun, pTerm); + } + } + } + sql = SQLUtils.formatMySql(sql); + return sql.replace(" - ","-"); + } + + public static String replaceSql(String sql, String all, String name, String terms) { + String[] termsArr = analyzer(terms); + int size = termsArr.length - 1; + String newAll = ""; + StringBuffer conBuffer = new StringBuffer(); + //String source = ""; + String source = name + " = " + "seg(" + terms + ")"; + for (int i = 0; i <= size; i++) { + if (i == 0) { + conBuffer.append(name).append("=").append(termsArr[i]); + } else { + conBuffer.append(" and ").append(name).append("=").append(termsArr[i]); + } + } + newAll = all.replace(source, conBuffer.toString()); + sql = sql.replace(all, newAll); + return sql; + } + + public static String replaceSql(String sql, String all, String name, String fun, String terms) { + String[] termsArr = analyzer(terms); + int indexMax = termsArr.length - 1; + String newAll = ""; + StringBuffer conBuffer = new StringBuffer(); + String source = ""; + source = name + " = " + fun + "(" + "seg(" + terms + ")"; + for (int i = 0; i <= indexMax; i++) { + if (i == 0) { //第一个term + if (indexMax == 0) { //共一个term + conBuffer.append(name).append("=").append(fun).append("(").append(termsArr[i]); + } else { + conBuffer.append(name).append("=").append(fun).append("(").append(termsArr[i]).append(")"); + } + } else if (i == indexMax) { //多个term时,最后一个 + conBuffer.append(" and ").append(name).append("=").append(fun).append("(").append(termsArr[i]); + } else { //中间的term + conBuffer.append(" and ").append(name).append("=").append(fun).append("(").append(termsArr[i]).append(")"); + } + } + newAll = all.replace(source, conBuffer.toString()); + sql = sql.replace(all, newAll); + return sql; + } + + + public static String parseName(String name) { + String[] tmp = name.split(",| "); + int num = tmp.length; + name = tmp[num - 1]; + return name; + } + + public static String parseFun(String fun) { + if (fun == null || fun.equals("")) { + fun = ""; + } else { + fun = fun.replace("(", ""); + } + return fun; + } + + public static String[] analyzer(String term) { + //TODO done + term = term.replaceAll("'", ""); + HttpRequester request = new HttpRequester(); + HttpResponse response = null; + String sourceTerms = ""; +// http://192.168.25.11:9688/_cat/analyze?text=大数据&analyzer=query_ansj + try { + String ip = AnsjElasticConfigurator.ES_IP; + String port = AnsjElasticConfigurator.ES_PORT; + String midUrl = "/_cat/analyze?analyzer=query_ansj&text="; + String preUrl = "http://" + ip + ":" + port + midUrl; + //preUrl = "http://192.168.25.11:9688/_cat/analyze?analyzer=query_ansj&text="; + String enTerm = URLEncoder.encode(term, "UTF-8"); + String url = preUrl + enTerm; + response = request.sendGet(url); + if (response.getCode() == 200) { + if (response != null && response.getContent().length() > 10) { + sourceTerms = response.getContent(); + } + } + + } catch (Exception e) { + e.printStackTrace(); + } + return getTerms(sourceTerms).split(","); + } + + private static String getTerms(String sourceTerms) { + StringBuffer sb = new StringBuffer(); + String[] lines = sourceTerms.split("\n"); + int lineLen = lines.length; + for (int i = 0; i < lineLen; i++) { + String[] terms = lines[i].split("\t"); + String term = terms[0].trim(); + if (i == 0) { + sb.append("\"").append(term).append("\""); + } else { + sb.append(",\"").append(term).append("\""); + } + } + return sb.toString(); + } + +} diff --git a/src/main/java/org/elasticsearch/plugin/nlpcn/preAnalyzer/SqlParseAnalyzer.java b/src/main/java/org/elasticsearch/plugin/nlpcn/preAnalyzer/SqlParseAnalyzer.java new file mode 100644 index 00000000..612cb4d5 --- /dev/null +++ b/src/main/java/org/elasticsearch/plugin/nlpcn/preAnalyzer/SqlParseAnalyzer.java @@ -0,0 +1,286 @@ +package org.elasticsearch.plugin.nlpcn.preAnalyzer; + +import com.alibaba.druid.sql.ast.SQLExpr; +import com.alibaba.druid.sql.ast.SQLOrderBy; +import com.alibaba.druid.sql.ast.expr.*; +import com.alibaba.druid.sql.ast.statement.SQLSelect; +import com.alibaba.druid.sql.ast.statement.SQLSelectOrderByItem; +import com.alibaba.druid.sql.ast.statement.SQLSelectQuery; +import com.alibaba.druid.sql.dialect.mysql.ast.statement.MySqlSelectQueryBlock; +import com.alibaba.druid.sql.dialect.mysql.visitor.MySqlOutputVisitor; +import com.alibaba.druid.util.JdbcConstants; +import org.elasticsearch.plugin.nlpcn.request.HttpRequester; +import org.elasticsearch.plugin.nlpcn.request.HttpResponse; +import org.nlpcn.es4sql.parse.ElasticLexer; +import org.nlpcn.es4sql.parse.ElasticSqlExprParser; + +import java.net.URLEncoder; +import java.util.ArrayList; +import java.util.List; + + +/** + * Created by fangbb on 2016-12-6. + */ +public class SqlParseAnalyzer { + public static String dbType = JdbcConstants.MYSQL; + + public static String seg(String sql) throws Exception { + if (sql.contains("seg(")) { + MySqlSelectQueryBlock query = getQueryBlock(sql); + parseWhere(query.getWhere()); + parseOrderBys(query.getOrderBy()); + sql = printSql(query); + } + return sql; + } + + private static MySqlSelectQueryBlock getQueryBlock(String sql) { + ElasticLexer lexer = new ElasticLexer(sql); + lexer.nextToken(); + ElasticSqlExprParser elasticSqlExprParser = new ElasticSqlExprParser(lexer); + SQLExpr expr = elasticSqlExprParser.expr(); + SQLQueryExpr sqlExpr = (SQLQueryExpr) expr; + SQLSelect sqlSelect = sqlExpr.getSubQuery(); + //获取SQLSelectQuery + SQLSelectQuery sqlSelectQuery = sqlSelect.getQuery(); + MySqlSelectQueryBlock query = (MySqlSelectQueryBlock) sqlSelectQuery; + return query; + } + + private static void parseWhere(SQLExpr where) throws Exception { + if (where == null) { + return; + } + preTraverse(where); + } + + private static void parseOrderBys(SQLOrderBy sqlOrderBy) throws Exception { + if (sqlOrderBy == null) { + return; + } + List items = sqlOrderBy.getItems(); + for (SQLSelectOrderByItem item : items) { + SQLExpr sqlExpr = item.getExpr(); + if (sqlExpr instanceof SQLMethodInvokeExpr) { + String nested = ((SQLMethodInvokeExpr) sqlExpr).getMethodName(); + List params = ((SQLMethodInvokeExpr) sqlExpr).getParameters(); + SQLBinaryOpExpr condition; + if (nested.equals("nested") && params.size() == 3) { + parseWhere((SQLBinaryOpExpr) params.get(2)); + //params.add(2, condition); + } else { + new Exception("Nested sorting must be 3 parameters"); + } + } + } + } + + //先序遍历获取叶子节点 + private static void preTraverse(SQLExpr sqlExpr) throws Exception { + if (sqlExpr instanceof SQLBinaryOpExpr) { + SQLBinaryOpExpr sqlBinaryOpExpr = (SQLBinaryOpExpr) sqlExpr; + SQLExpr left = sqlBinaryOpExpr.getLeft(); + SQLExpr right = sqlBinaryOpExpr.getRight(); + if (isLeaf(left) && isLeaf(right)) { + //TODO + replaceLeafNode(sqlBinaryOpExpr); + return; + } else { + preTraverse(left); + preTraverse(right); + } + } else if (sqlExpr instanceof SQLMethodInvokeExpr) { + String nested = ((SQLMethodInvokeExpr) sqlExpr).getMethodName(); + List params = ((SQLMethodInvokeExpr) sqlExpr).getParameters(); + SQLBinaryOpExpr condition; + if (nested.equals("nested") && params.size() == 2) { + parseWhere((SQLBinaryOpExpr) params.get(1)); + } else { + new Exception("Nested where must be 2 parameters"); + } + } + } + + private static boolean isLeaf(SQLExpr sqlExpr) { + if (sqlExpr instanceof SQLBinaryOpExpr) { + return false; + } + return true; + } + + private static Method parseMethod(SQLExpr right) throws Exception { + Method retMethod = new Method(); + if (right instanceof SQLMethodInvokeExpr) { + SQLMethodInvokeExpr methodInvokeExpr = ((SQLMethodInvokeExpr) right); + String methodName = methodInvokeExpr.getMethodName(); + retMethod.setParentMethod(methodName); + List childMethod = methodInvokeExpr.getParameters(); + SQLExpr sqlExpr = childMethod.get(0); + if (sqlExpr instanceof SQLMethodInvokeExpr) { + retMethod.setChildenMethod(((SQLMethodInvokeExpr) sqlExpr).getMethodName()); + SQLExpr nSqlExpr = ((SQLMethodInvokeExpr) sqlExpr).getParameters().get(0); + if (nSqlExpr instanceof SQLCharExpr) { + retMethod.setParams(((SQLCharExpr) nSqlExpr).getText()); + } else if (nSqlExpr instanceof SQLIdentifierExpr) { + if (retMethod.getChildenMethod().equals("seg")) { + SQLIdentifierExpr identifierExpr = (SQLIdentifierExpr) nSqlExpr; + retMethod.setParams(identifierExpr.getName()); + childMethod.clear(); + childMethod.add(0, identifierExpr); + retMethod.setChildenMethod(null); + } + } + } else if (sqlExpr instanceof SQLCharExpr) { + retMethod.setParams(((SQLCharExpr) sqlExpr).getText()); + } else if (sqlExpr instanceof SQLIdentifierExpr) { + retMethod.setParams(((SQLIdentifierExpr) sqlExpr).getName()); + } + } + return retMethod; + } + + private static boolean segNoQuota(SQLMethodInvokeExpr methodInvokeExpr) { + List params = methodInvokeExpr.getParameters(); + if (params.get(0) instanceof SQLIdentifierExpr) { + return true; + } + return false; + } + + private static void removeSegFun(SQLBinaryOpExpr binaryOpExpr,SQLExpr right,Method method){ + //当seg内没有引号时,去掉seg() + if (segNoQuota((SQLMethodInvokeExpr) right) && method.containSeg()) { + method.setParentMethod(null); + List params = ((SQLMethodInvokeExpr) right).getParameters(); + binaryOpExpr.setRight(params.get(0)); + } + } + + //对叶节点分词,构造新节点 + private static void replaceLeafNode(SQLBinaryOpExpr binaryOpExpr) throws Exception { + SQLExpr left = binaryOpExpr.getLeft(); + SQLExpr right = binaryOpExpr.getRight(); + SQLBinaryOperator operator = binaryOpExpr.getOperator(); + String filed = ((SQLIdentifierExpr) left).getName(); + if (right instanceof SQLMethodInvokeExpr) { + Method method = parseMethod(right); + String sourceTerm = method.getParams(); + removeSegFun(binaryOpExpr,right,method); + //seg(term("abc")) exception + if (method.containSeg() && sourceTerm != null) { + String[] terms = analyzer(sourceTerm); + //String[] terms = "a,b".split(","); + String funName = method.getFunName(); + List allNewNode = new ArrayList(); + for (String term : terms) { + if (!funName.equals("")) { + SQLMethodInvokeExpr methodInvokeExpr = new SQLMethodInvokeExpr(); + methodInvokeExpr.setMethodName(funName); + methodInvokeExpr.addParameter(new SQLCharExpr(term)); + allNewNode.add(createNode(filed, methodInvokeExpr, operator)); + } else { + SQLCharExpr charExpr = new SQLCharExpr(); + charExpr.setText(term); + allNewNode.add(createNode(filed, charExpr, operator)); + } + + } + conTree(binaryOpExpr, allNewNode); + } + } + } + + //构造新的二叉树替换原有节点 + private static void conTree(SQLBinaryOpExpr retExpr, List SQLBinaryOpNode) { + int size = SQLBinaryOpNode.size(); + int andNum = size - 1; + List allNode = new ArrayList(); + if (andNum == 0) { + retExpr.setRight(SQLBinaryOpNode.get(0).getRight()); + }else { + for (int i = 0; i < andNum; i++) { + if (i == 0) { + retExpr.setOperator(SQLBinaryOperator.BooleanAnd); + allNode.add(retExpr); + } else { + allNode.add(createNode(null, null, SQLBinaryOperator.BooleanAnd)); + } + } + allNode.addAll(SQLBinaryOpNode); + //共有n-1个And,n个node,每一个节点从0开始进行编号,那么第i个节点的左孩子的编号为2*i+1,右孩子为2*i+2。 + for (int parentIndex = 0; parentIndex < andNum; parentIndex++) { + allNode.get(parentIndex).setLeft(allNode.get(parentIndex * 2 + 1)); + allNode.get(parentIndex).setRight(allNode.get(parentIndex * 2 + 2)); + } + } + } + + + //TODO 构造一个节点 + private static SQLBinaryOpExpr createNode(String filed, SQLExpr value, SQLBinaryOperator operator) { + SQLBinaryOpExpr retWhere = new SQLBinaryOpExpr(); + SQLIdentifierExpr ileft = new SQLIdentifierExpr(); + ileft.setName(filed); + retWhere.setLeft(ileft); + retWhere.setOperator(operator); + retWhere.setRight(value); + return retWhere; + } + + private static String printSql(MySqlSelectQueryBlock query) { + StringBuilder out = new StringBuilder(); + MySqlOutputVisitor visitor = new MySqlOutputVisitor(out); + query.accept0(visitor); + return out.toString(); + } + + public static String[] analyzer(String term) { + //TODO done + HttpRequester request = new HttpRequester(); + HttpResponse response = null; + String sourceTerms = ""; +// http://192.168.25.11:9688/_cat/analyze?text=大数据&analyzer=query_ansj + try { + //String ip = InetAddress.getLocalHost().getHostAddress(); + String ip = AnsjElasticConfigurator.ES_IP; + String port = AnsjElasticConfigurator.ES_PORT; + String midUrl = "/_cat/analyze?analyzer=query_ansj&text="; + String preUrl = "http://" + ip + ":" + port + midUrl; + //String preUrl = "http://192.168.25.11:9688/_cat/analyze?analyzer=query_ansj&text="; + System.out.println(preUrl); + String enTerm = URLEncoder.encode(term, "UTF-8"); + String url = preUrl + enTerm; + System.out.println(url); + response = request.sendGet(url); + if (response.getCode() == 200) { + if (response != null && response.getContent().length() > 10) { + sourceTerms = response.getContent(); + } + } + + } catch (Exception e) { + e.printStackTrace(); + } + return getTerms(sourceTerms).split(","); + } + + private static String getTerms(String sourceTerms) { + StringBuffer sb = new StringBuffer(); + String[] lines = sourceTerms.split("\n"); + int lineLen = lines.length; + for (int i = 0; i < lineLen; i++) { + String[] terms = lines[i].split("\t"); + String term = terms[0].trim(); + int size = terms.length; + if (i == 0) { + //sb.append("\"").append(term).append("\""); + sb.append(term); + } else { + sb.append(",").append(term); + } + } + return sb.toString(); + } + +} diff --git a/src/main/java/org/elasticsearch/plugin/nlpcn/preAnalyzer/SqlSegment.java b/src/main/java/org/elasticsearch/plugin/nlpcn/preAnalyzer/SqlSegment.java new file mode 100644 index 00000000..0b0a4113 --- /dev/null +++ b/src/main/java/org/elasticsearch/plugin/nlpcn/preAnalyzer/SqlSegment.java @@ -0,0 +1,172 @@ +package org.elasticsearch.plugin.nlpcn.preAnalyzer; + +import com.alibaba.druid.sql.SQLUtils; +import org.elasticsearch.plugin.nlpcn.preAnalyzer.AnsjElasticConfigurator; +import org.elasticsearch.plugin.nlpcn.request.HttpRequester; +import org.elasticsearch.plugin.nlpcn.request.HttpResponse; + +import java.net.URLEncoder; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** + * Created by fangbb on 2016-11-25. + */ +public class SqlSegment { + public static String seg(String sql) { + if (sql.contains("seg(")){ + String pattern = "( .*?)=(.*?)seg\\((.*?)\\)"; + Pattern p = Pattern.compile(pattern); + Matcher m = p.matcher(sql); + String pAll = ""; + String pName = ""; + String pFun = ""; + String pTerm = ""; + //System.out.println(sql); + while (m.find()) { + pAll = m.group(); + pName = m.group(1); + pFun = m.group(2); + pTerm = m.group(3); + System.out.println("========"); + System.out.println(pAll); + if (pFun.contains("=")) { + String pNew = pFun + "seg(" + pTerm + ")"; + // System.out.println(pNew); + Matcher pm = p.matcher(pNew); + pm.find(); + pAll = pm.group(); + pName = pm.group(1); + pFun = pm.group(2); + pTerm = pm.group(3); + } + pName = parseName(pName); + pFun = parseFun(pFun); + if (pFun == null || pFun.equals("")) { + sql = replaceSql(sql, pAll, pName, pTerm); + } else { + sql = replaceSql(sql, pAll, pName, pFun, pTerm); + } + } + } + return sql; + } + + public static String replaceSql(String sql, String all, String name, String terms) { + String[] termsArr = analyzer(terms); + int size = termsArr.length - 1; + String newAll = ""; + StringBuffer conBuffer = new StringBuffer(); + //String source = ""; + String source = name + "=" + "seg(" + terms + ")"; + for (int i = 0; i <= size; i++) { + if (i == 0) { + conBuffer.append(name).append("=").append(termsArr[i]); + } else { + conBuffer.append(" and ").append(name).append("=").append(termsArr[i]); + } + } + newAll = all.replace(source, conBuffer.toString()); + sql = sql.replace(all, newAll); + return sql; + } + + public static String replaceSql(String sql, String all, String name, String fun, String terms) { + String[] termsArr = analyzer(terms); + int indexMax = termsArr.length - 1; + String newAll = ""; + //String conditions = ""; + StringBuffer conBuffer = new StringBuffer(); + String source = ""; + source = name + "=" + fun + "(" + "seg(" + terms + ")"; + for (int i = 0; i <= indexMax; i++) { + if (i == 0) { //第一个term + if (indexMax == 0) { //共一个term + conBuffer.append(name).append("=").append(fun).append("(").append(termsArr[i]); + } else { + conBuffer.append(name).append("=").append(fun).append("(").append(termsArr[i]).append(")"); + } + } else if (i == indexMax) { //多个term时,最后一个 + conBuffer.append(" and ").append(name).append("=").append(fun).append("(").append(termsArr[i]); + } else { //中间的term + conBuffer.append(" and ").append(name).append("=").append(fun).append("(").append(termsArr[i]).append(")"); + } + } + newAll = all.replace(source, conBuffer.toString()); + sql = sql.replace(all, newAll); + return sql; + } + + + public static String parseName(String name) { + String[] tmp = name.split(",| "); + int num = tmp.length; + name = tmp[num - 1]; + return name; + } + + public static String parseFun(String fun) { + if (fun == null || fun.equals("")) { + fun = ""; + } else { + fun = fun.replace("(", ""); + } + return fun; + } + + public static String parseTerm(String term) { + if (term != null || term.equals("")) { + term = term.replace("\"", ""); + } + return term; + } + + public static String[] analyzer(String term) { + //TODO done + term = term.replaceAll("\"", ""); + HttpRequester request = new HttpRequester(); + HttpResponse response = null; + String sourceTerms = ""; +// http://192.168.25.11:9688/_cat/analyze?text=大数据&analyzer=query_ansj + try { + //String ip = InetAddress.getLocalHost().getHostAddress(); + String ip = AnsjElasticConfigurator.ES_IP; + String port = AnsjElasticConfigurator.ES_PORT; + String midUrl = "/_cat/analyze?analyzer=query_ansj&text="; + String preUrl = "http://" + ip + ":" + port + midUrl; + //String preUrl = "http://192.168.25.11:9688/_cat/analyze?analyzer=query_ansj&text="; + System.out.println(preUrl); + String enTerm = URLEncoder.encode(term, "UTF-8"); + String url = preUrl + enTerm; + System.out.println(url); + response = request.sendGet(url); + if (response.getCode() == 200) { + if (response != null && response.getContent().length() > 10) { + sourceTerms = response.getContent(); + } + } + + } catch (Exception e) { + e.printStackTrace(); + } + return getTerms(sourceTerms).split(","); + } + + private static String getTerms(String sourceTerms) { + StringBuffer sb = new StringBuffer(); + String[] lines = sourceTerms.split("\n"); + int lineLen = lines.length; + for (int i = 0; i < lineLen; i++) { + String[] terms = lines[i].split("\t"); + String term = terms[0].trim(); + int size = terms.length; + if (i == 0) { + sb.append("\"").append(term).append("\""); + } else { + sb.append(",\"").append(term).append("\""); + } + } + return sb.toString(); + } + +} diff --git a/src/main/java/org/elasticsearch/plugin/nlpcn/request/HttpRequester.java b/src/main/java/org/elasticsearch/plugin/nlpcn/request/HttpRequester.java new file mode 100644 index 00000000..f5e98dc1 --- /dev/null +++ b/src/main/java/org/elasticsearch/plugin/nlpcn/request/HttpRequester.java @@ -0,0 +1,299 @@ +package org.elasticsearch.plugin.nlpcn.request; + + +import java.io.*; +import java.net.HttpURLConnection; +import java.net.URL; +import java.nio.charset.Charset; +import java.util.Map; +import java.util.Vector; + +/** + * HTTP请求对象 + * + */ +public class HttpRequester { + + + private String defaultContentEncoding; + + private int defaultTimeout = 5000; + + public HttpRequester() { + this.defaultContentEncoding = Charset.defaultCharset().name(); + } + + /** + * 发送GET请求 + * + * @param urlString + * URL地址 + * @return 响应对象 + * @throws java.io.IOException + */ + public HttpResponse sendGet(String urlString) throws IOException { + return this.send(urlString, "GET", null, null); + } + + /** + * 发送GET请求 + * + * @param urlString + * URL地址 + * @param params + * 参数集合 + * @return 响应对象 + * @throws java.io.IOException + */ + public HttpResponse sendGet(String urlString, Map params) + throws IOException { + return this.send(urlString, "GET", params, null); + } + + /** + * 发送GET请求 + * + * @param urlString + * URL地址 + * @param params + * 参数集合 + * @param propertys + * 请求属性 + * @return 响应对象 + * @throws java.io.IOException + */ + public HttpResponse sendGet(String urlString, Map params, + Map propertys) throws IOException { + return this.send(urlString, "GET", params, propertys); + } + + /** + * 发送POST请求 + * + * @param urlString + * URL地址 + * @return 响应对象 + * @throws java.io.IOException + */ + public HttpResponse sendPost(String urlString) throws IOException { + return this.send(urlString, "POST", null, null); + } + + /** + * 发送POST请求 + * + * @param urlString + * URL地址 + * @param params + * 参数集合 + * @return 响应对象 + * @throws java.io.IOException + */ + public HttpResponse sendPost(String urlString, Map params) + throws IOException { + return this.send(urlString, "POST", params, null); + } + + /** + * 发送POST请求 + * + * @param urlString + * URL地址 + * @param params + * 参数集合 + * @param propertys + * 请求属性 + * @return 响应对象 + * @throws java.io.IOException + */ + public HttpResponse sendPost(String urlString, Map params, + Map propertys) throws IOException { + return this.send(urlString, "POST", params, propertys); + } + + /* + * POST + */ + public HttpResponse sendPostRawData(String urlString, Map parameters, + Map propertys, String rawData) throws IOException { + + String method = "POST"; + + HttpURLConnection urlConnection = null; + + if (method.equalsIgnoreCase("POST") && parameters != null) { + StringBuffer param = new StringBuffer(); + int i = 0; + for (String key : parameters.keySet()) { + if (i == 0) + param.append("?"); + else + param.append("&"); + param.append(key).append("=").append(parameters.get(key)); + i++; + } + urlString += param; + } + URL url = new URL(urlString); + urlConnection = (HttpURLConnection) url.openConnection(); + + urlConnection.setRequestMethod(method); + urlConnection.setDoOutput(true); + urlConnection.setDoInput(true); + urlConnection.setUseCaches(false); + urlConnection.setConnectTimeout(defaultTimeout); + urlConnection.setRequestProperty("User-Agent", "Mozilla/4.0 (compatible; MSIE 5.0; Windows NT; DigExt)"); + + urlConnection.setRequestProperty("Content-Type", "text/plain;charset=UTF-8"); + + if (propertys != null) + for (String key : propertys.keySet()) { + urlConnection.addRequestProperty(key, propertys.get(key)); + } + + if (method.equalsIgnoreCase("POST") && rawData != null) { + + OutputStream outStream = urlConnection.getOutputStream(); + outStream.write(rawData.getBytes()); + + outStream.flush(); + outStream.close(); + } + + return this.makeContent(urlString, urlConnection); + } + + /** + * 发送HTTP请求 + * + * @param urlString + * @return 响映对象 + * @throws java.io.IOException + */ + private HttpResponse send(String urlString, String method, + Map parameters, Map propertys) + throws IOException { + HttpURLConnection urlConnection = null; + + if (method.equalsIgnoreCase("GET") && parameters != null) { + StringBuffer param = new StringBuffer(); + int i = 0; + for (String key : parameters.keySet()) { + if (i == 0) + param.append("?"); + else + param.append("&"); + param.append(key).append("=").append(parameters.get(key)); + i++; + } + urlString += param; + } + URL url = new URL(urlString); + urlConnection = (HttpURLConnection) url.openConnection(); + + urlConnection.setRequestMethod(method); + urlConnection.setDoOutput(true); + urlConnection.setDoInput(true); + urlConnection.setUseCaches(false); + urlConnection.setConnectTimeout(defaultTimeout); + urlConnection.setReadTimeout(defaultTimeout*2); + urlConnection.setRequestProperty("User-Agent", "Mozilla/4.0 (compatible; MSIE 5.0; Windows NT; DigExt)"); + if (propertys != null) + for (String key : propertys.keySet()) { + urlConnection.addRequestProperty(key, propertys.get(key)); + } + + if (method.equalsIgnoreCase("POST") && parameters != null) { + StringBuffer param = new StringBuffer(); + for (String key : parameters.keySet()) { + param.append("&"); + param.append(key).append("=").append(parameters.get(key)); + } + urlConnection.getOutputStream().write(param.toString().getBytes()); + urlConnection.getOutputStream().flush(); + urlConnection.getOutputStream().close(); + } + + return this.makeContent(urlString, urlConnection); + } + + /** + * 得到响应对象 + * + * @param urlConnection + * @return 响应对象 + * @throws java.io.IOException + */ + private HttpResponse makeContent(String urlString, + HttpURLConnection urlConnection) throws IOException { + HttpResponse httpResponser = new HttpResponse(); + try { + InputStream in = urlConnection.getInputStream(); + BufferedReader bufferedReader = new BufferedReader( + new InputStreamReader(in)); + httpResponser.contentCollection = new Vector(); + StringBuffer temp = new StringBuffer(); + String line = bufferedReader.readLine(); + while (line != null) { + httpResponser.contentCollection.add(line); + temp.append(line).append("\r\n"); + line = bufferedReader.readLine(); + } + bufferedReader.close(); + + String ecod = urlConnection.getContentEncoding(); + if (ecod == null) + ecod = this.defaultContentEncoding; + + httpResponser.urlString = urlString; + + httpResponser.defaultPort = urlConnection.getURL().getDefaultPort(); + httpResponser.file = urlConnection.getURL().getFile(); + httpResponser.host = urlConnection.getURL().getHost(); + httpResponser.path = urlConnection.getURL().getPath(); + httpResponser.port = urlConnection.getURL().getPort(); + httpResponser.protocol = urlConnection.getURL().getProtocol(); + httpResponser.query = urlConnection.getURL().getQuery(); + httpResponser.ref = urlConnection.getURL().getRef(); + httpResponser.userInfo = urlConnection.getURL().getUserInfo(); + + httpResponser.content = new String(temp.toString().getBytes(), ecod); + httpResponser.contentEncoding = ecod; + httpResponser.code = urlConnection.getResponseCode(); + httpResponser.message = urlConnection.getResponseMessage(); + httpResponser.contentType = urlConnection.getContentType(); + httpResponser.method = urlConnection.getRequestMethod(); + httpResponser.connectTimeout = urlConnection.getConnectTimeout(); + httpResponser.readTimeout = urlConnection.getReadTimeout(); + + return httpResponser; + } catch (IOException e) { + throw e; + } finally { + if (urlConnection != null) + urlConnection.disconnect(); + } + } + + /** + * 默认的响应字符集 + */ + public String getDefaultContentEncoding() { + return this.defaultContentEncoding; + } + + /** + * 设置默认的响应字符集 + */ + public void setDefaultContentEncoding(String defaultContentEncoding) { + this.defaultContentEncoding = defaultContentEncoding; + } + + public int getDefaultTimeout() { + return defaultTimeout; + } + + public void setDefaultTimeout(int defaultTimeout) { + this.defaultTimeout = defaultTimeout; + } +} \ No newline at end of file diff --git a/src/main/java/org/elasticsearch/plugin/nlpcn/request/HttpResponse.java b/src/main/java/org/elasticsearch/plugin/nlpcn/request/HttpResponse.java new file mode 100644 index 00000000..04cbe5e2 --- /dev/null +++ b/src/main/java/org/elasticsearch/plugin/nlpcn/request/HttpResponse.java @@ -0,0 +1,124 @@ +package org.elasticsearch.plugin.nlpcn.request; +import java.util.Vector; + + +/** + * 响应对象 + */ +public class HttpResponse { + + String urlString; + + int defaultPort; + + String file; + + String host; + + String path; + + int port; + + String protocol; + + String query; + + String ref; + + String userInfo; + + String contentEncoding; + + String content; + + String contentType; + + int code; + + String message; + + String method; + + int connectTimeout; + + int readTimeout; + + Vector contentCollection; + + public String getContent() { + return content; + } + + public String getContentType() { + return contentType; + } + + public int getCode() { + return code; + } + + public String getMessage() { + return message; + } + + public Vector getContentCollection() { + return contentCollection; + } + + public String getContentEncoding() { + return contentEncoding; + } + + public String getMethod() { + return method; + } + + public int getConnectTimeout() { + return connectTimeout; + } + + public int getReadTimeout() { + return readTimeout; + } + + public String getUrlString() { + return urlString; + } + + public int getDefaultPort() { + return defaultPort; + } + + public String getFile() { + return file; + } + + public String getHost() { + return host; + } + + public String getPath() { + return path; + } + + public int getPort() { + return port; + } + + public String getProtocol() { + return protocol; + } + + public String getQuery() { + return query; + } + + public String getRef() { + return ref; + } + + public String getUserInfo() { + return userInfo; + } + +} \ No newline at end of file diff --git a/src/main/java/org/nlpcn/es4sql/domain/Query.java b/src/main/java/org/nlpcn/es4sql/domain/Query.java index 67708205..7a95bb61 100644 --- a/src/main/java/org/nlpcn/es4sql/domain/Query.java +++ b/src/main/java/org/nlpcn/es4sql/domain/Query.java @@ -57,4 +57,8 @@ public String[] getTypeArr() { return list.toArray(new String[list.size()]); } +// public boolean replaceWhere(Where where){ +// this.where = where; +// return true; +// } } diff --git a/src/main/java/org/nlpcn/es4sql/domain/Select.java b/src/main/java/org/nlpcn/es4sql/domain/Select.java index caad4688..673952af 100644 --- a/src/main/java/org/nlpcn/es4sql/domain/Select.java +++ b/src/main/java/org/nlpcn/es4sql/domain/Select.java @@ -151,5 +151,10 @@ public boolean isOrderdSelect(){ public boolean isSelectAll() { return selectAll; } + +// public boolean replaceOrderBys(List orderBys){ +// this.orderBys = orderBys; +// return true; +// } } diff --git a/src/test/java/org/nlpcn/es4sql/TestSql.java b/src/test/java/org/nlpcn/es4sql/TestSql.java new file mode 100644 index 00000000..d148b0dc --- /dev/null +++ b/src/test/java/org/nlpcn/es4sql/TestSql.java @@ -0,0 +1,123 @@ +package org.nlpcn.es4sql; + +import com.alibaba.druid.sql.SQLUtils; +import com.alibaba.druid.sql.ast.SQLExpr; +import com.alibaba.druid.sql.ast.SQLObject; +import com.alibaba.druid.sql.ast.SQLOrderBy; +import com.alibaba.druid.sql.ast.SQLStatement; +import com.alibaba.druid.sql.ast.expr.SQLQueryExpr; +import com.alibaba.druid.sql.ast.statement.SQLSelectStatement; +import com.alibaba.druid.sql.dialect.mysql.ast.statement.MySqlSelectQueryBlock; +import com.alibaba.druid.sql.dialect.mysql.parser.MySqlExprParser; +import com.alibaba.druid.sql.dialect.mysql.parser.MySqlStatementParser; +import com.alibaba.druid.sql.dialect.mysql.visitor.MySqlOutputVisitor; +import org.elasticsearch.plugin.nlpcn.preAnalyzer.SqlAnalyzer; +import org.elasticsearch.plugin.nlpcn.preAnalyzer.SqlParseAnalyzer; +import org.nlpcn.es4sql.parse.ElasticLexer; +import org.nlpcn.es4sql.parse.ElasticSqlExprParser; +import org.junit.Test; +import java.net.InetAddress; +import java.util.List; + +/** + * Created by fangbb on 2016-12-4. + */ +public class Test { + @Test + public void StrTest() { + //String sql = "select * from test where nested(info,info.name = term(seg(\"大数据云计算\")) and info.name=seg(\"python|Hbase|Hive\") or info.name=term(seg(\"Hadoop\"))) and city=seg(\"hah\") and province=\"河北省\" order by nested(info,sum(info.age), info.name=seg(\"java\") and info.name=terms(seg(\"python\")) and info.name=seg(\"Hadoop\")) desc,score desc"; + String sql = "select * from a where name=seg(\"大数据云计算\") and age > 18 order by age desc"; + //System.out.println(sql); + //sql = SqlSegment.seg(sql); + sql = SqlParseAnalyzer.seg(sql); + //sql = SqlAnalyzer.seg(sql); + System.out.println("-------------"); + System.out.println(sql); + System.out.println("-------------"); + } + + @org.junit.Test + public void Str() throws Exception { + String ip = InetAddress.getLocalHost().getHostAddress(); + System.out.println(ip); + } + + @org.junit.Test + public void SqlExprParser() { + try { + String sql = "select * from a where name=seg(\"大数据云计算\") and age > 18 order by age desc"; + ElasticSqlExprParser parser1 = new ElasticSqlExprParser(sql); + SQLExpr expr = parser1.expr(); + SQLQueryExpr sqlExpr = (SQLQueryExpr) expr; + MySqlSelectQueryBlock query = (MySqlSelectQueryBlock) sqlExpr.getSubQuery().getQuery(); + SQLOrderBy orderBy = query.getOrderBy(); + SQLExpr sqlExpr1 = query.getWhere(); + System.out.println("===="); + } catch (Exception e) { + e.printStackTrace(); + } + + } + + @org.junit.Test + public void StatementParser() { + String sql = "select * from a where name=seg(\"大数据云计算\") and age > 18 order by age desc"; + ElasticLexer lexer = new ElasticLexer(sql); + lexer.nextToken(); + MySqlStatementParser parser = new MySqlStatementParser(lexer); + List statementList = parser.parseStatementList(); + StringBuilder out = new StringBuilder(); + MySqlOutputVisitor visitor = new MySqlOutputVisitor(out); + for (SQLStatement statement : statementList) { + statement.accept(visitor); + //visitor.println(); + } + System.out.println(out.toString()); + } + + @org.junit.Test + public void myStatementParser() { + String sql = "select * from a-b where name=seg(\"大数据云计算\") and age > 18 order by age desc"; + ElasticLexer lexer = new ElasticLexer(sql); + lexer.nextToken(); + MySqlStatementParser mySqlStatementParser = new MySqlStatementParser(lexer); + + MySqlExprParser mySqlExprParser = mySqlStatementParser.getExprParser(); + SQLExpr expr = mySqlExprParser.expr(); + SQLQueryExpr sqlExpr = (SQLQueryExpr) expr; + MySqlSelectQueryBlock query = (MySqlSelectQueryBlock) sqlExpr.getSubQuery().getQuery(); + + + List statementList = mySqlStatementParser.parseStatementList(); + StringBuilder out = new StringBuilder(); + MySqlOutputVisitor visitor = new MySqlOutputVisitor(out); + for (SQLStatement statement : statementList) { + statement.accept(visitor); + //visitor.println(); + } + System.out.println(out.toString()); + } + + + @Test + public void pdStr() { + String tmp = "hello w o !"; + char[] ch = tmp.toCharArray(); + int idx = ch.length - 1; + int len = 0; + char term; + for (int i = idx; i >= 0; i--) { + term = ch[i]; + if (Character.isAlphabetic(term)) { + len += 1; + } else if (term == ' ' && len != 0) { + break; + } + System.out.println(term); + } + System.out.println("======"); + System.out.println(len); + System.out.println("======"); + } + +} From eadbae645262ef8363d2b908c3a621797236d8c9 Mon Sep 17 00:00:00 2001 From: gitxxx Date: Fri, 9 Dec 2016 12:56:09 +0800 Subject: [PATCH 05/23] update pom.xml --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 2c3f4ddd..fb020fe4 100644 --- a/pom.xml +++ b/pom.xml @@ -3,7 +3,7 @@ 4.0.0 org.nlpcn elasticsearch-sql - 2.3.1.1 + 2.3.1.2 jar Query elasticsearch using SQL elasticsearch-sql From 74b2d49d02979ba43bfa17d5e2f41632b7a59a86 Mon Sep 17 00:00:00 2001 From: gitxxx Date: Fri, 9 Dec 2016 17:37:08 +0800 Subject: [PATCH 06/23] fig bug about nesting sort --- .../nlpcn/preAnalyzer/SqlParseAnalyzer.java | 8 +++--- .../org/nlpcn/es4sql/parse/SqlParser.java | 28 +++++++++---------- .../nlpcn/es4sql/query/maker/SortMaker.java | 8 +++++- 3 files changed, 25 insertions(+), 19 deletions(-) diff --git a/src/main/java/org/elasticsearch/plugin/nlpcn/preAnalyzer/SqlParseAnalyzer.java b/src/main/java/org/elasticsearch/plugin/nlpcn/preAnalyzer/SqlParseAnalyzer.java index 612cb4d5..accbaa95 100644 --- a/src/main/java/org/elasticsearch/plugin/nlpcn/preAnalyzer/SqlParseAnalyzer.java +++ b/src/main/java/org/elasticsearch/plugin/nlpcn/preAnalyzer/SqlParseAnalyzer.java @@ -235,7 +235,7 @@ private static String printSql(MySqlSelectQueryBlock query) { return out.toString(); } - public static String[] analyzer(String term) { + public static String[] analyzer(String term) throws Exception{ //TODO done HttpRequester request = new HttpRequester(); HttpResponse response = null; @@ -248,10 +248,10 @@ public static String[] analyzer(String term) { String midUrl = "/_cat/analyze?analyzer=query_ansj&text="; String preUrl = "http://" + ip + ":" + port + midUrl; //String preUrl = "http://192.168.25.11:9688/_cat/analyze?analyzer=query_ansj&text="; - System.out.println(preUrl); + //System.out.println(preUrl); String enTerm = URLEncoder.encode(term, "UTF-8"); String url = preUrl + enTerm; - System.out.println(url); + //System.out.println(url); response = request.sendGet(url); if (response.getCode() == 200) { if (response != null && response.getContent().length() > 10) { @@ -260,7 +260,7 @@ public static String[] analyzer(String term) { } } catch (Exception e) { - e.printStackTrace(); + throw new Exception("There is an error in the word segmentation"); } return getTerms(sourceTerms).split(","); } diff --git a/src/main/java/org/nlpcn/es4sql/parse/SqlParser.java b/src/main/java/org/nlpcn/es4sql/parse/SqlParser.java index e2bbf403..67cb53f6 100644 --- a/src/main/java/org/nlpcn/es4sql/parse/SqlParser.java +++ b/src/main/java/org/nlpcn/es4sql/parse/SqlParser.java @@ -22,9 +22,9 @@ /** * es sql support - * + * * @author ansj - * + * */ public class SqlParser { @@ -221,19 +221,19 @@ else if(isChildren) NestedType nestedType = new NestedType(); if(nestedType.tryFillFromExpr(between.getTestExpr())){ leftSide = nestedType.field; - + isNested = true; } ChildrenType childrenType = new ChildrenType(); if(childrenType.tryFillFromExpr(between.getTestExpr())){ leftSide = childrenType.field; - + isChildren = true; } Condition condition = null; - + if(isNested) condition = new Condition(CONN.valueOf(opear), leftSide, between.isNot() ? "NOT BETWEEN" : "BETWEEN", new Object[]{parseValue(between.beginExpr), parseValue(between.endExpr)}, nestedType); else if(isChildren) @@ -251,21 +251,21 @@ else if (expr instanceof SQLMethodInvokeExpr) { String methodName = methodExpr.getMethodName(); if(SpatialParamsFactory.isAllowedMethod(methodName)){ String fieldName = methodParameters.get(0).toString(); - + boolean isNested = false; boolean isChildren = false; NestedType nestedType = new NestedType(); if (nestedType.tryFillFromExpr(methodParameters.get(0))) { fieldName = nestedType.field; - + isNested = true; } ChildrenType childrenType = new ChildrenType(); if (childrenType.tryFillFromExpr(methodParameters.get(0))) { fieldName = childrenType.field; - + isChildren = true; } @@ -317,12 +317,12 @@ else if (methodName.toLowerCase().equals("script")){ } } else if (expr instanceof SQLInSubQueryExpr){ SQLInSubQueryExpr sqlIn = (SQLInSubQueryExpr) expr; - + Select innerSelect = parseSelect((MySqlSelectQueryBlock) sqlIn.getSubQuery().getQuery()); - + if(innerSelect.getFields() == null || innerSelect.getFields().size()!=1) throw new SqlParseException("should only have one return field in subQuery"); - + SubQueryExpression subQueryExpression = new SubQueryExpression(innerSelect); String leftSide = sqlIn.getExpr().toString(); @@ -333,14 +333,14 @@ else if (methodName.toLowerCase().equals("script")){ NestedType nestedType = new NestedType(); if(nestedType.tryFillFromExpr(sqlIn.getExpr())){ leftSide = nestedType.field; - + isNested = true; } - + ChildrenType childrenType = new ChildrenType(); if(childrenType.tryFillFromExpr(sqlIn.getExpr())){ leftSide = childrenType.field; - + isChildren = true; } diff --git a/src/main/java/org/nlpcn/es4sql/query/maker/SortMaker.java b/src/main/java/org/nlpcn/es4sql/query/maker/SortMaker.java index 0a2d5728..34594ab7 100644 --- a/src/main/java/org/nlpcn/es4sql/query/maker/SortMaker.java +++ b/src/main/java/org/nlpcn/es4sql/query/maker/SortMaker.java @@ -34,7 +34,13 @@ public static SortBuilder explan(Order order) { List conditions = where.getWheres(); for (Where con : conditions) { key = ((Condition) con).getName(); - val = ((Condition) con).getValue().toString(); + Object vals = ((Condition) con).getValue(); + if (vals.getClass().isArray()) { + Object[] md = (Object[]) vals; + val = md[0].toString(); + } else { + val = vals.toString(); + } queryBuilder = explanSort(queryBuilder,key,val); } From 3eb364401356a086775c676d1c0d438206bc2262 Mon Sep 17 00:00:00 2001 From: gitxxx Date: Fri, 9 Dec 2016 19:02:16 +0800 Subject: [PATCH 07/23] modify the nested sort processing policy --- .../nlpcn/es4sql/query/maker/SortMaker.java | 41 ++++++++++++------- 1 file changed, 26 insertions(+), 15 deletions(-) diff --git a/src/main/java/org/nlpcn/es4sql/query/maker/SortMaker.java b/src/main/java/org/nlpcn/es4sql/query/maker/SortMaker.java index 34594ab7..db4a062e 100644 --- a/src/main/java/org/nlpcn/es4sql/query/maker/SortMaker.java +++ b/src/main/java/org/nlpcn/es4sql/query/maker/SortMaker.java @@ -10,7 +10,9 @@ import org.nlpcn.es4sql.domain.Order; import org.nlpcn.es4sql.domain.Where; +import java.util.HashMap; import java.util.List; +import java.util.Map; /** * Created by fangbb on 2016-11-22. @@ -28,22 +30,9 @@ public static SortBuilder explan(Order order) { String path = order.getPath(); String mode = order.getMode() == null ? "sum" : order.getMode(); String type = order.getType(); - String key = ""; - String val = ""; BoolQueryBuilder queryBuilder = QueryBuilders.boolQuery(); List conditions = where.getWheres(); - for (Where con : conditions) { - key = ((Condition) con).getName(); - Object vals = ((Condition) con).getValue(); - if (vals.getClass().isArray()) { - Object[] md = (Object[]) vals; - val = md[0].toString(); - } else { - val = vals.toString(); - } - queryBuilder = explanSort(queryBuilder,key,val); - } - + setCondition(conditions, queryBuilder); SortBuilder sort = SortBuilders .fieldSort(filedName) .setNestedFilter(queryBuilder) @@ -54,7 +43,29 @@ public static SortBuilder explan(Order order) { return sort; } - public static BoolQueryBuilder explanSort(BoolQueryBuilder queryBuilder, String key, String value){ + public static void setCondition(List conditions, BoolQueryBuilder queryBuilder) { + String key = ""; + String val = ""; + for (Where con : conditions) { + if (con instanceof Condition) { + key = ((Condition) con).getName(); + Object vals = ((Condition) con).getValue(); + if (vals.getClass().isArray()) { + Object[] md = (Object[]) vals; + val = md[0].toString(); + } else { + val = vals.toString(); + } + queryBuilder = explanSort(queryBuilder, key, val); + } else if (con instanceof Where) { + List conWhere = con.getWheres(); + setCondition(conWhere, queryBuilder); + } + + } + } + + public static BoolQueryBuilder explanSort(BoolQueryBuilder queryBuilder, String key, String value) { QueryBuilder termQueryBuilder = QueryBuilders.termQuery(key, value); queryBuilder = queryBuilder.should(termQueryBuilder); return queryBuilder; From f4dface362b6075a3d1bc590f289a9b8cc1c94ab Mon Sep 17 00:00:00 2001 From: gitxxx Date: Mon, 12 Dec 2016 10:31:21 +0800 Subject: [PATCH 08/23] add unit test --- .../java/org/nlpcn/es4sql/NestedSortTest.java | 59 +++++++++++++++++++ .../java/org/nlpcn/es4sql/PreAnalyzer.java | 20 +++++++ 2 files changed, 79 insertions(+) create mode 100644 src/test/java/org/nlpcn/es4sql/NestedSortTest.java create mode 100644 src/test/java/org/nlpcn/es4sql/PreAnalyzer.java diff --git a/src/test/java/org/nlpcn/es4sql/NestedSortTest.java b/src/test/java/org/nlpcn/es4sql/NestedSortTest.java new file mode 100644 index 00000000..2cf2ccee --- /dev/null +++ b/src/test/java/org/nlpcn/es4sql/NestedSortTest.java @@ -0,0 +1,59 @@ +package org.nlpcn.es4sql; + +import org.elasticsearch.client.transport.TransportClient; +import org.elasticsearch.common.transport.InetSocketTransportAddress; +import org.elasticsearch.plugin.deletebyquery.DeleteByQueryPlugin; +import org.junit.Test; +import org.nlpcn.es4sql.query.QueryAction; +import org.nlpcn.es4sql.query.SqlElasticRequestBuilder; + +import java.net.InetAddress; +import java.net.UnknownHostException; + + +/** + * Created by fangbb on 2016-12-12. + */ +public class NestedSortTest { + @Test + public void nestedSortTest(){ + try { + TransportClient client; + client = TransportClient.builder().addPlugin(DeleteByQueryPlugin.class).build().addTransportAddress(getTransportAddress()); + //String sql = "select * from test where nested(info,info.name=term(\"java\") and info.name=term(\"python\") or info.name=term(\"Hadoop\")) order by nested(info,sum(info.age),info.name=\"java\" and info.name=\"python\" and info.name=\"Hadoop\") desc,score desc"; + String sql = "SELECT * FROM test order by nested(a,sum(a.b),a.c=term(\"hadoop\") and a.c=term(\"百度和谷歌\") or a.c=term(\"test\") or a.c=term(\"test2\"))"; + //String sql = "SELECT * FROM test where nested(calc_skill_blog,calc_skill_blog.keyword=\"java\")"; + Long now = System.currentTimeMillis(); + SearchDao searchDao = new SearchDao(client); + QueryAction queryAction = searchDao.explain(sql); + SqlElasticRequestBuilder xx = queryAction.explain(); + String jsonExplanation = xx.explain(); + //String jsonExplanation = queryAction.explain().explain(); + System.out.println(System.currentTimeMillis() - now + " ms"); + System.out.println(jsonExplanation); + } catch (Exception e) { + e.printStackTrace(); + } + } + + + public static InetSocketTransportAddress getTransportAddress() throws UnknownHostException { + + String host = "192.168.25.11"; + String port = "9300"; + if (host == null) { + host = "localhost"; + System.out.println("ES_TEST_HOST enviroment variable does not exist. choose default 'localhost'"); + } + + if (port == null) { + port = "9300"; + System.out.println("ES_TEST_PORT enviroment variable does not exist. choose default '9300'"); + } + + System.out.println(String.format("Connection details: host: %s. port:%s.", host, port)); + return new InetSocketTransportAddress(InetAddress.getByName(host), Integer.parseInt(port)); + } + + +} diff --git a/src/test/java/org/nlpcn/es4sql/PreAnalyzer.java b/src/test/java/org/nlpcn/es4sql/PreAnalyzer.java new file mode 100644 index 00000000..5cf9d1de --- /dev/null +++ b/src/test/java/org/nlpcn/es4sql/PreAnalyzer.java @@ -0,0 +1,20 @@ +package org.nlpcn.es4sql; + +import org.elasticsearch.plugin.nlpcn.preAnalyzer.SqlParseAnalyzer; +import org.junit.Test; + +/** + * Created by fangbb on 2016-12-12. + */ +public class PreAnalyzer { + @Test + public void SqlParseAnalyzerTest(){ + try { + String sql = "SELECT * FROM test where nested(a,a.c=term(seg(\"百度和谷歌\"))) order by nested(a,sum(a.b),a.c=term(seg(\"java hadoop\")) and a.c=term(seg(\"百度和谷歌\")) or a.c=term(\"test\"))"; + String ret = SqlParseAnalyzer.seg(sql); + System.out.println(ret); + } catch (Exception e) { + e.printStackTrace(); + } + } +} From 8078c83cd39bb58b0406d79a4a292a37c04ff84c Mon Sep 17 00:00:00 2001 From: gitxxx Date: Tue, 13 Dec 2016 11:42:00 +0800 Subject: [PATCH 09/23] tmp ver --- .../plugin/nlpcn/preAnalyzer/Analyzer.java | 61 ++++ .../nlpcn/preAnalyzer/SqlParseAnalyzer.java | 267 +++++++++++++----- .../java/org/nlpcn/es4sql/PreAnalyzer.java | 11 +- 3 files changed, 268 insertions(+), 71 deletions(-) create mode 100644 src/main/java/org/elasticsearch/plugin/nlpcn/preAnalyzer/Analyzer.java diff --git a/src/main/java/org/elasticsearch/plugin/nlpcn/preAnalyzer/Analyzer.java b/src/main/java/org/elasticsearch/plugin/nlpcn/preAnalyzer/Analyzer.java new file mode 100644 index 00000000..7a3f9391 --- /dev/null +++ b/src/main/java/org/elasticsearch/plugin/nlpcn/preAnalyzer/Analyzer.java @@ -0,0 +1,61 @@ +package org.elasticsearch.plugin.nlpcn.preAnalyzer; + +import org.elasticsearch.plugin.nlpcn.request.HttpRequester; +import org.elasticsearch.plugin.nlpcn.request.HttpResponse; + +import java.net.URLEncoder; + +/** + * Created by fangbb on 2016-12-12. + */ +public class Analyzer { + + public static String[] analyzer(String term) throws Exception { + //TODO done + HttpRequester request = new HttpRequester(); + HttpResponse response = null; + String sourceTerms = ""; +// http://192.168.25.11:9688/_cat/analyze?text=大数据&analyzer=query_ansj + try { + //String ip = InetAddress.getLocalHost().getHostAddress(); + String ip = AnsjElasticConfigurator.ES_IP; + String port = AnsjElasticConfigurator.ES_PORT; + String midUrl = "/_cat/analyze?analyzer=query_ansj&text="; + //String preUrl = "http://" + ip + ":" + port + midUrl; + String preUrl = "http://192.168.25.11:9688/_cat/analyze?analyzer=query_ansj&text="; + //System.out.println(preUrl); + String enTerm = URLEncoder.encode(term, "UTF-8"); + String url = preUrl + enTerm; + //System.out.println(url); + response = request.sendGet(url); + if (response.getCode() == 200) { + if (response != null && response.getContent().length() > 10) { + sourceTerms = response.getContent(); + } + } + + } catch (Exception e) { + throw new Exception("There is an error in the word segmentation"); + } + return getTerms(sourceTerms).split(","); + } + + private static String getTerms(String sourceTerms) { + StringBuffer sb = new StringBuffer(); + String[] lines = sourceTerms.split("\n"); + int lineLen = lines.length; + for (int i = 0; i < lineLen; i++) { + String[] terms = lines[i].split("\t"); + String term = terms[0].trim(); + int size = terms.length; + if (i == 0) { + //sb.append("\"").append(term).append("\""); + sb.append(term); + } else { + sb.append(",").append(term); + } + } + return sb.toString(); + } + +} diff --git a/src/main/java/org/elasticsearch/plugin/nlpcn/preAnalyzer/SqlParseAnalyzer.java b/src/main/java/org/elasticsearch/plugin/nlpcn/preAnalyzer/SqlParseAnalyzer.java index accbaa95..29f661cb 100644 --- a/src/main/java/org/elasticsearch/plugin/nlpcn/preAnalyzer/SqlParseAnalyzer.java +++ b/src/main/java/org/elasticsearch/plugin/nlpcn/preAnalyzer/SqlParseAnalyzer.java @@ -1,6 +1,7 @@ package org.elasticsearch.plugin.nlpcn.preAnalyzer; import com.alibaba.druid.sql.ast.SQLExpr; +import com.alibaba.druid.sql.ast.SQLObject; import com.alibaba.druid.sql.ast.SQLOrderBy; import com.alibaba.druid.sql.ast.expr.*; import com.alibaba.druid.sql.ast.statement.SQLSelect; @@ -9,12 +10,9 @@ import com.alibaba.druid.sql.dialect.mysql.ast.statement.MySqlSelectQueryBlock; import com.alibaba.druid.sql.dialect.mysql.visitor.MySqlOutputVisitor; import com.alibaba.druid.util.JdbcConstants; -import org.elasticsearch.plugin.nlpcn.request.HttpRequester; -import org.elasticsearch.plugin.nlpcn.request.HttpResponse; import org.nlpcn.es4sql.parse.ElasticLexer; import org.nlpcn.es4sql.parse.ElasticSqlExprParser; -import java.net.URLEncoder; import java.util.ArrayList; import java.util.List; @@ -82,33 +80,110 @@ private static void preTraverse(SQLExpr sqlExpr) throws Exception { SQLBinaryOpExpr sqlBinaryOpExpr = (SQLBinaryOpExpr) sqlExpr; SQLExpr left = sqlBinaryOpExpr.getLeft(); SQLExpr right = sqlBinaryOpExpr.getRight(); - if (isLeaf(left) && isLeaf(right)) { - //TODO + SQLBinaryOperator sqlBinaryOperator = sqlBinaryOpExpr.getOperator(); + if (isLeaf(sqlBinaryOperator)) { + // 普通叶节点替换 replaceLeafNode(sqlBinaryOpExpr); - return; } else { preTraverse(left); preTraverse(right); } - } else if (sqlExpr instanceof SQLMethodInvokeExpr) { - String nested = ((SQLMethodInvokeExpr) sqlExpr).getMethodName(); - List params = ((SQLMethodInvokeExpr) sqlExpr).getParameters(); - SQLBinaryOpExpr condition; - if (nested.equals("nested") && params.size() == 2) { - parseWhere((SQLBinaryOpExpr) params.get(1)); + } else if (isNested(sqlExpr)) { + //nested 嵌套页节点 + replaceNestedLeafNode(sqlExpr); + } + } + + //TODO 对Nested叶节点拆分 + //TODO 分词 + //TODO 构造新节点 + private static void replaceNestedLeafNode(SQLExpr sqlExpr) throws Exception { + SQLObject sqlObject = sqlExpr.getParent(); + SQLExpr newExpr = parseNested(sqlExpr); + if (sqlObject instanceof MySqlSelectQueryBlock) { + ((MySqlSelectQueryBlock) sqlObject).setWhere(newExpr); + } else if (sqlObject instanceof SQLBinaryOpExpr) { + if (sqlExpr.equals(((SQLBinaryOpExpr) sqlObject).getRight())) { + //TODO + ((SQLBinaryOpExpr) sqlObject).setRight(newExpr); + System.out.println("============right"); } else { - new Exception("Nested where must be 2 parameters"); + //TODO + ((SQLBinaryOpExpr) sqlObject).setLeft(newExpr); + System.out.println("============left"); } } } - private static boolean isLeaf(SQLExpr sqlExpr) { + //TODO 解析nested的叶节点,返回新构造的叶节点 + private static SQLExpr parseNested(SQLExpr sqlExpr) throws Exception { + String methodName = ((SQLMethodInvokeExpr) sqlExpr).getMethodName(); + SQLExpr pathName = ((SQLMethodInvokeExpr) sqlExpr).getParameters().get(0); + SQLExpr where = ((SQLMethodInvokeExpr) sqlExpr).getParameters().get(1); + SQLExpr retExpr = null; + if (where != null) { + preTraverseNested(methodName, pathName, where); + if (isLeaf(where)) { + SQLObject parent = where.getParent(); + if (parent instanceof SQLMethodInvokeExpr) { + SQLMethodInvokeExpr tmp = ((SQLMethodInvokeExpr) parent); + if (tmp.getParameters().size() == 3) { + retExpr = tmp.getParameters().get(2); + } else { + retExpr = tmp; + } + } + }else if (where instanceof SQLBinaryOpExpr) { + retExpr = where; + } + + } + return retExpr; + } + + private static void preTraverseNested(String methodName, SQLExpr pathName, SQLExpr sqlExpr) throws Exception { if (sqlExpr instanceof SQLBinaryOpExpr) { + SQLBinaryOpExpr sqlBinaryOpExpr = (SQLBinaryOpExpr) sqlExpr; + SQLExpr left = sqlBinaryOpExpr.getLeft(); + SQLExpr right = sqlBinaryOpExpr.getRight(); + SQLBinaryOperator sqlBinaryOperator = sqlBinaryOpExpr.getOperator(); + //left和right都是a=b这种形式 + if (isLeaf(sqlBinaryOperator)) { + generateNestedLeafNode(methodName, pathName, sqlBinaryOpExpr); + } else { + preTraverseNested(methodName, pathName, left); + preTraverseNested(methodName, pathName, right); + } + } + } + + private static boolean isNested(SQLExpr sqlExpr) { + if (sqlExpr instanceof SQLMethodInvokeExpr) { + String mName = ((SQLMethodInvokeExpr) sqlExpr).getMethodName(); + if (mName.equals("nested")) { + return true; + } + } + return false; + } + + private static boolean isLeaf(SQLBinaryOperator sqlBinaryOperator) { + if (sqlBinaryOperator.equals(SQLBinaryOperator.BooleanOr) || sqlBinaryOperator.equals(SQLBinaryOperator.BooleanAnd)) { return false; } return true; } + private static boolean isLeaf(SQLExpr sqlExpr) { + if (sqlExpr instanceof SQLBinaryOpExpr) { + SQLBinaryOperator sqlBinaryOperator = ((SQLBinaryOpExpr) sqlExpr).getOperator(); + if (sqlBinaryOperator.equals(SQLBinaryOperator.BooleanOr) || sqlBinaryOperator.equals(SQLBinaryOperator.BooleanAnd)) { + return false; + } + } + return true; + } + private static Method parseMethod(SQLExpr right) throws Exception { Method retMethod = new Method(); if (right instanceof SQLMethodInvokeExpr) { @@ -148,7 +223,7 @@ private static boolean segNoQuota(SQLMethodInvokeExpr methodInvokeExpr) { return false; } - private static void removeSegFun(SQLBinaryOpExpr binaryOpExpr,SQLExpr right,Method method){ + private static void removeSegFun(SQLBinaryOpExpr binaryOpExpr, SQLExpr right, Method method) { //当seg内没有引号时,去掉seg() if (segNoQuota((SQLMethodInvokeExpr) right) && method.containSeg()) { method.setParentMethod(null); @@ -157,6 +232,96 @@ private static void removeSegFun(SQLBinaryOpExpr binaryOpExpr,SQLExpr right,Met } } + private static void generateNestedLeafNode(String methodName, SQLExpr pathName, SQLBinaryOpExpr binaryOpExpr) throws Exception { + //right:a.b = "d" + SQLExpr left = binaryOpExpr.getLeft(); + SQLExpr right = binaryOpExpr.getRight(); + SQLBinaryOperator operator = binaryOpExpr.getOperator(); + String filed = ((SQLIdentifierExpr) left).getName(); + if (right instanceof SQLMethodInvokeExpr) { + Method method = parseMethod(right); + String sourceTerm = method.getParams(); + removeSegFun(binaryOpExpr, right, method); + //seg(term("abc")) exception + if (method.containSeg() && sourceTerm != null) { + String[] terms = Analyzer.analyzer(sourceTerm); + //String[] terms = "a,b".split(","); + String funName = method.getFunName(); + List allNewNode = new ArrayList(); + for (String term : terms) { + if (!funName.equals("")) { + SQLMethodInvokeExpr methodInvokeExpr = new SQLMethodInvokeExpr(); + methodInvokeExpr.setMethodName(funName); + methodInvokeExpr.addParameter(new SQLCharExpr(term)); + SQLBinaryOpExpr opNode = createOpNode(filed, methodInvokeExpr, operator); + SQLMethodInvokeExpr nestedNode = createNestedNode(methodName, pathName, opNode); + allNewNode.add(nestedNode); + } else { + SQLCharExpr charExpr = new SQLCharExpr(); + charExpr.setText(term); + SQLBinaryOpExpr opNode = createOpNode(filed, charExpr, operator); + SQLMethodInvokeExpr nestedNode = createNestedNode(methodName, pathName, opNode); + allNewNode.add(nestedNode); + } + } + conNestedTree(binaryOpExpr, allNewNode); + } + } else { + List allNewNode = new ArrayList(); + SQLBinaryOpExpr opNode = createOpNode(filed, right, operator); + SQLMethodInvokeExpr nestedNode = createNestedNode(methodName, pathName, opNode); + allNewNode.add(nestedNode); + conNestedTree(binaryOpExpr, allNewNode); + } + } + + private static void replaceOldNode(SQLExpr sqlExpr,SQLExpr newExpr) throws Exception { + SQLObject sqlObject = sqlExpr.getParent(); + if (sqlObject instanceof MySqlSelectQueryBlock) { + ((MySqlSelectQueryBlock) sqlObject).setWhere(newExpr); + } else if (sqlObject instanceof SQLBinaryOpExpr) { + if (sqlExpr.equals(((SQLBinaryOpExpr) sqlObject).getRight())) { + //TODO + ((SQLBinaryOpExpr) sqlObject).setRight(newExpr); + System.out.println("============right"); + } else { + //TODO + ((SQLBinaryOpExpr) sqlObject).setLeft(newExpr); + System.out.println("============left"); + } + } else if (sqlObject instanceof SQLMethodInvokeExpr) { + ((SQLMethodInvokeExpr) sqlObject).addParameter(newExpr); + } + } + + //构造新的二叉树替换原有节点 + private static void conNestedTree(SQLBinaryOpExpr retExpr, List sqlExprs) throws Exception{ + int size = sqlExprs.size(); + int andNum = size - 1; + List allNode = new ArrayList(); + if (andNum == 0) { + replaceOldNode(retExpr,sqlExprs.get(0)); + } else { + for (int i = 0; i < andNum; i++) { + if (i == 0) { + retExpr.setOperator(SQLBinaryOperator.BooleanAnd); + //retExpr做为顶点 + allNode.add(retExpr); + } else { + allNode.add(createOpNode(null, null, SQLBinaryOperator.BooleanAnd)); + } + } + allNode.addAll(sqlExprs); + //共有n-1个And,n个node,每一个节点从0开始进行编号,那么第i个节点的左孩子的编号为2*i+1,右孩子为2*i+2。 + for (int parentIndex = 0; parentIndex < andNum; parentIndex++) { + if (allNode.get(parentIndex) instanceof SQLBinaryOpExpr) { + ((SQLBinaryOpExpr) allNode.get(parentIndex)).setLeft(allNode.get(parentIndex * 2 + 1)); + ((SQLBinaryOpExpr) allNode.get(parentIndex)).setRight(allNode.get(parentIndex * 2 + 2)); + } + } + } + } + //对叶节点分词,构造新节点 private static void replaceLeafNode(SQLBinaryOpExpr binaryOpExpr) throws Exception { SQLExpr left = binaryOpExpr.getLeft(); @@ -166,10 +331,10 @@ private static void replaceLeafNode(SQLBinaryOpExpr binaryOpExpr) throws Excepti if (right instanceof SQLMethodInvokeExpr) { Method method = parseMethod(right); String sourceTerm = method.getParams(); - removeSegFun(binaryOpExpr,right,method); + removeSegFun(binaryOpExpr, right, method); //seg(term("abc")) exception if (method.containSeg() && sourceTerm != null) { - String[] terms = analyzer(sourceTerm); + String[] terms = Analyzer.analyzer(sourceTerm); //String[] terms = "a,b".split(","); String funName = method.getFunName(); List allNewNode = new ArrayList(); @@ -178,11 +343,11 @@ private static void replaceLeafNode(SQLBinaryOpExpr binaryOpExpr) throws Excepti SQLMethodInvokeExpr methodInvokeExpr = new SQLMethodInvokeExpr(); methodInvokeExpr.setMethodName(funName); methodInvokeExpr.addParameter(new SQLCharExpr(term)); - allNewNode.add(createNode(filed, methodInvokeExpr, operator)); + allNewNode.add(createOpNode(filed, methodInvokeExpr, operator)); } else { SQLCharExpr charExpr = new SQLCharExpr(); charExpr.setText(term); - allNewNode.add(createNode(filed, charExpr, operator)); + allNewNode.add(createOpNode(filed, charExpr, operator)); } } @@ -198,13 +363,14 @@ private static void conTree(SQLBinaryOpExpr retExpr, List SQLBi List allNode = new ArrayList(); if (andNum == 0) { retExpr.setRight(SQLBinaryOpNode.get(0).getRight()); - }else { + } else { for (int i = 0; i < andNum; i++) { if (i == 0) { retExpr.setOperator(SQLBinaryOperator.BooleanAnd); + //retExpr做为顶点 allNode.add(retExpr); } else { - allNode.add(createNode(null, null, SQLBinaryOperator.BooleanAnd)); + allNode.add(createOpNode(null, null, SQLBinaryOperator.BooleanAnd)); } } allNode.addAll(SQLBinaryOpNode); @@ -218,7 +384,7 @@ private static void conTree(SQLBinaryOpExpr retExpr, List SQLBi //TODO 构造一个节点 - private static SQLBinaryOpExpr createNode(String filed, SQLExpr value, SQLBinaryOperator operator) { + private static SQLBinaryOpExpr createOpNode(String filed, SQLExpr value, SQLBinaryOperator operator) { SQLBinaryOpExpr retWhere = new SQLBinaryOpExpr(); SQLIdentifierExpr ileft = new SQLIdentifierExpr(); ileft.setName(filed); @@ -228,6 +394,15 @@ private static SQLBinaryOpExpr createNode(String filed, SQLExpr value, SQLBinary return retWhere; } + //TODO 构造一个nested节点 + private static SQLMethodInvokeExpr createNestedNode(String name, SQLExpr pathName, SQLBinaryOpExpr sqlBinaryOpExpr) { + SQLMethodInvokeExpr sqlMethodInvokeExpr = new SQLMethodInvokeExpr(); + sqlMethodInvokeExpr.setMethodName(name); + sqlMethodInvokeExpr.addParameter(pathName); + sqlMethodInvokeExpr.addParameter(sqlBinaryOpExpr); + return sqlMethodInvokeExpr; + } + private static String printSql(MySqlSelectQueryBlock query) { StringBuilder out = new StringBuilder(); MySqlOutputVisitor visitor = new MySqlOutputVisitor(out); @@ -235,52 +410,4 @@ private static String printSql(MySqlSelectQueryBlock query) { return out.toString(); } - public static String[] analyzer(String term) throws Exception{ - //TODO done - HttpRequester request = new HttpRequester(); - HttpResponse response = null; - String sourceTerms = ""; -// http://192.168.25.11:9688/_cat/analyze?text=大数据&analyzer=query_ansj - try { - //String ip = InetAddress.getLocalHost().getHostAddress(); - String ip = AnsjElasticConfigurator.ES_IP; - String port = AnsjElasticConfigurator.ES_PORT; - String midUrl = "/_cat/analyze?analyzer=query_ansj&text="; - String preUrl = "http://" + ip + ":" + port + midUrl; - //String preUrl = "http://192.168.25.11:9688/_cat/analyze?analyzer=query_ansj&text="; - //System.out.println(preUrl); - String enTerm = URLEncoder.encode(term, "UTF-8"); - String url = preUrl + enTerm; - //System.out.println(url); - response = request.sendGet(url); - if (response.getCode() == 200) { - if (response != null && response.getContent().length() > 10) { - sourceTerms = response.getContent(); - } - } - - } catch (Exception e) { - throw new Exception("There is an error in the word segmentation"); - } - return getTerms(sourceTerms).split(","); - } - - private static String getTerms(String sourceTerms) { - StringBuffer sb = new StringBuffer(); - String[] lines = sourceTerms.split("\n"); - int lineLen = lines.length; - for (int i = 0; i < lineLen; i++) { - String[] terms = lines[i].split("\t"); - String term = terms[0].trim(); - int size = terms.length; - if (i == 0) { - //sb.append("\"").append(term).append("\""); - sb.append(term); - } else { - sb.append(",").append(term); - } - } - return sb.toString(); - } - } diff --git a/src/test/java/org/nlpcn/es4sql/PreAnalyzer.java b/src/test/java/org/nlpcn/es4sql/PreAnalyzer.java index 5cf9d1de..302d0603 100644 --- a/src/test/java/org/nlpcn/es4sql/PreAnalyzer.java +++ b/src/test/java/org/nlpcn/es4sql/PreAnalyzer.java @@ -10,7 +10,16 @@ public class PreAnalyzer { @Test public void SqlParseAnalyzerTest(){ try { - String sql = "SELECT * FROM test where nested(a,a.c=term(seg(\"百度和谷歌\"))) order by nested(a,sum(a.b),a.c=term(seg(\"java hadoop\")) and a.c=term(seg(\"百度和谷歌\")) or a.c=term(\"test\"))"; + String sql = "SELECT * FROM test where nested(a,a.c=term(seg(\"百度和谷歌\")) and a.c=\"中国\") or nested(a,a.c=term(seg(\"java hadoop\")))"; + //String sql = "SELECT * FROM test where nested(a,a.c=term(seg(\"百度和谷歌\")) and a.c=seg(\"中国\")) and a=\"hadop\""; + //String sql = "SELECT * FROM test where nested(a,a.c=term(seg(\"百度\"))) "; + //String sql = "SELECT * FROM test where nested(a,a.c=seg(\"百度\"))"; + //String sql = "SELECT * FROM test where nested(a,a.c=term(\"中国\"))"; + //String sql = "SELECT * FROM test where a is not null and b=\"xxx\" and nested(a,a.c=term(seg(\"百度\")))"; + //String sql = "SELECT * FROM test where a is not null and b=\"xxx\" and nested(a,a.c=term(seg(\"百度和谷歌\")) and a.c=term(seg(\"hadoop java\")) and a.c=term(seg(\"test\")))"; + //String sql = "SELECT * FROM test where a is not null and b=\"xxx\" and nested(a,a.c=\"abc\") order by nested(a,sum(a.b),a.c=term(seg(\"百度和谷歌\")))"; + //String sql = "SELECT * FROM test where nested(a,a.c=\"abc\") order by nested(a,sum(a.b),a.c=term(seg(\"百度和谷歌\")))"; + //String sql = "SELECT * FROM test where nested(a,a.c=term(seg(\"百度和谷歌\"))) "; String ret = SqlParseAnalyzer.seg(sql); System.out.println(ret); } catch (Exception e) { From d7f9d6bf28be5d54cdc55c8717d266a0322e459b Mon Sep 17 00:00:00 2001 From: gitxxx Date: Tue, 13 Dec 2016 11:45:26 +0800 Subject: [PATCH 10/23] remove print --- .../plugin/nlpcn/preAnalyzer/SqlParseAnalyzer.java | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/main/java/org/elasticsearch/plugin/nlpcn/preAnalyzer/SqlParseAnalyzer.java b/src/main/java/org/elasticsearch/plugin/nlpcn/preAnalyzer/SqlParseAnalyzer.java index 29f661cb..de483f73 100644 --- a/src/main/java/org/elasticsearch/plugin/nlpcn/preAnalyzer/SqlParseAnalyzer.java +++ b/src/main/java/org/elasticsearch/plugin/nlpcn/preAnalyzer/SqlParseAnalyzer.java @@ -281,13 +281,9 @@ private static void replaceOldNode(SQLExpr sqlExpr,SQLExpr newExpr) throws Excep ((MySqlSelectQueryBlock) sqlObject).setWhere(newExpr); } else if (sqlObject instanceof SQLBinaryOpExpr) { if (sqlExpr.equals(((SQLBinaryOpExpr) sqlObject).getRight())) { - //TODO ((SQLBinaryOpExpr) sqlObject).setRight(newExpr); - System.out.println("============right"); } else { - //TODO ((SQLBinaryOpExpr) sqlObject).setLeft(newExpr); - System.out.println("============left"); } } else if (sqlObject instanceof SQLMethodInvokeExpr) { ((SQLMethodInvokeExpr) sqlObject).addParameter(newExpr); From 0e0ff23d10474c56b577a2bd0d70273f21e21c7b Mon Sep 17 00:00:00 2001 From: gitxxx Date: Tue, 13 Dec 2016 18:37:14 +0800 Subject: [PATCH 11/23] fix bug --- .../plugin/nlpcn/RestSqlAction.java | 2 -- .../nlpcn/preAnalyzer/SqlParseAnalyzer.java | 27 ++++++++++--------- 2 files changed, 15 insertions(+), 14 deletions(-) diff --git a/src/main/java/org/elasticsearch/plugin/nlpcn/RestSqlAction.java b/src/main/java/org/elasticsearch/plugin/nlpcn/RestSqlAction.java index 1d00a793..7f80ed77 100644 --- a/src/main/java/org/elasticsearch/plugin/nlpcn/RestSqlAction.java +++ b/src/main/java/org/elasticsearch/plugin/nlpcn/RestSqlAction.java @@ -5,9 +5,7 @@ import org.elasticsearch.common.settings.Settings; import org.elasticsearch.plugin.nlpcn.executors.ActionRequestRestExecuterFactory; import org.elasticsearch.plugin.nlpcn.executors.RestExecutor; -import org.elasticsearch.plugin.nlpcn.preAnalyzer.SqlAnalyzer; import org.elasticsearch.plugin.nlpcn.preAnalyzer.SqlParseAnalyzer; -import org.elasticsearch.plugin.nlpcn.preAnalyzer.SqlSegment; import org.elasticsearch.rest.*; import org.nlpcn.es4sql.SearchDao; import org.nlpcn.es4sql.query.QueryAction; diff --git a/src/main/java/org/elasticsearch/plugin/nlpcn/preAnalyzer/SqlParseAnalyzer.java b/src/main/java/org/elasticsearch/plugin/nlpcn/preAnalyzer/SqlParseAnalyzer.java index de483f73..5c06bab7 100644 --- a/src/main/java/org/elasticsearch/plugin/nlpcn/preAnalyzer/SqlParseAnalyzer.java +++ b/src/main/java/org/elasticsearch/plugin/nlpcn/preAnalyzer/SqlParseAnalyzer.java @@ -13,6 +13,7 @@ import org.nlpcn.es4sql.parse.ElasticLexer; import org.nlpcn.es4sql.parse.ElasticSqlExprParser; +import java.sql.SQLFeatureNotSupportedException; import java.util.ArrayList; import java.util.List; @@ -63,10 +64,8 @@ private static void parseOrderBys(SQLOrderBy sqlOrderBy) throws Exception { if (sqlExpr instanceof SQLMethodInvokeExpr) { String nested = ((SQLMethodInvokeExpr) sqlExpr).getMethodName(); List params = ((SQLMethodInvokeExpr) sqlExpr).getParameters(); - SQLBinaryOpExpr condition; if (nested.equals("nested") && params.size() == 3) { - parseWhere((SQLBinaryOpExpr) params.get(2)); - //params.add(2, condition); + parseWhere(params.get(2)); } else { new Exception("Nested sorting must be 3 parameters"); } @@ -89,8 +88,10 @@ private static void preTraverse(SQLExpr sqlExpr) throws Exception { preTraverse(right); } } else if (isNested(sqlExpr)) { - //nested 嵌套页节点 + //nested 嵌套叶节点 replaceNestedLeafNode(sqlExpr); + } else { + throw new SQLFeatureNotSupportedException(); } } @@ -104,13 +105,9 @@ private static void replaceNestedLeafNode(SQLExpr sqlExpr) throws Exception { ((MySqlSelectQueryBlock) sqlObject).setWhere(newExpr); } else if (sqlObject instanceof SQLBinaryOpExpr) { if (sqlExpr.equals(((SQLBinaryOpExpr) sqlObject).getRight())) { - //TODO ((SQLBinaryOpExpr) sqlObject).setRight(newExpr); - System.out.println("============right"); } else { - //TODO ((SQLBinaryOpExpr) sqlObject).setLeft(newExpr); - System.out.println("============left"); } } } @@ -133,7 +130,7 @@ private static SQLExpr parseNested(SQLExpr sqlExpr) throws Exception { retExpr = tmp; } } - }else if (where instanceof SQLBinaryOpExpr) { + } else if (where instanceof SQLBinaryOpExpr) { retExpr = where; } @@ -265,6 +262,12 @@ private static void generateNestedLeafNode(String methodName, SQLExpr pathName, } } conNestedTree(binaryOpExpr, allNewNode); + } else { + List allNewNode = new ArrayList(); + SQLBinaryOpExpr opNode = createOpNode(filed, right, operator); + SQLMethodInvokeExpr nestedNode = createNestedNode(methodName, pathName, opNode); + allNewNode.add(nestedNode); + conNestedTree(binaryOpExpr, allNewNode); } } else { List allNewNode = new ArrayList(); @@ -275,7 +278,7 @@ private static void generateNestedLeafNode(String methodName, SQLExpr pathName, } } - private static void replaceOldNode(SQLExpr sqlExpr,SQLExpr newExpr) throws Exception { + private static void replaceOldNode(SQLExpr sqlExpr, SQLExpr newExpr) throws Exception { SQLObject sqlObject = sqlExpr.getParent(); if (sqlObject instanceof MySqlSelectQueryBlock) { ((MySqlSelectQueryBlock) sqlObject).setWhere(newExpr); @@ -291,12 +294,12 @@ private static void replaceOldNode(SQLExpr sqlExpr,SQLExpr newExpr) throws Excep } //构造新的二叉树替换原有节点 - private static void conNestedTree(SQLBinaryOpExpr retExpr, List sqlExprs) throws Exception{ + private static void conNestedTree(SQLBinaryOpExpr retExpr, List sqlExprs) throws Exception { int size = sqlExprs.size(); int andNum = size - 1; List allNode = new ArrayList(); if (andNum == 0) { - replaceOldNode(retExpr,sqlExprs.get(0)); + replaceOldNode(retExpr, sqlExprs.get(0)); } else { for (int i = 0; i < andNum; i++) { if (i == 0) { From 7551b5c4d8d99242462f172e2fe4d1ca9fd0abab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9E=97=E9=9B=AA=E6=B8=85?= Date: Tue, 13 Dec 2016 19:16:06 +0800 Subject: [PATCH 12/23] =?UTF-8?q?=E5=AE=9E=E7=8E=B0SqlParseAnalyzer?= =?UTF-8?q?=E7=9A=84=E5=8D=95=E5=85=83=E6=B5=8B=E8=AF=95=EF=BC=8C=E5=B9=B6?= =?UTF-8?q?=E5=9B=A0=E6=AD=A4=E9=87=8D=E6=9E=84=E7=9B=B8=E5=85=B3=E4=BB=A3?= =?UTF-8?q?=E7=A0=81=E3=80=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../plugin/nlpcn/preAnalyzer/Analyzer.java | 61 ------------------ .../nlpcn/preAnalyzer/AnsjAnalyzer.java | 63 ++++++++++++++++--- .../nlpcn/preAnalyzer/AnsjAnalyzerPlugin.java | 16 +++++ .../es4sql/{TestSql.java => SegTest.java} | 0 4 files changed, 70 insertions(+), 70 deletions(-) delete mode 100644 src/main/java/org/elasticsearch/plugin/nlpcn/preAnalyzer/Analyzer.java create mode 100644 src/main/java/org/elasticsearch/plugin/nlpcn/preAnalyzer/AnsjAnalyzerPlugin.java rename src/test/java/org/nlpcn/es4sql/{TestSql.java => SegTest.java} (100%) diff --git a/src/main/java/org/elasticsearch/plugin/nlpcn/preAnalyzer/Analyzer.java b/src/main/java/org/elasticsearch/plugin/nlpcn/preAnalyzer/Analyzer.java deleted file mode 100644 index 7a3f9391..00000000 --- a/src/main/java/org/elasticsearch/plugin/nlpcn/preAnalyzer/Analyzer.java +++ /dev/null @@ -1,61 +0,0 @@ -package org.elasticsearch.plugin.nlpcn.preAnalyzer; - -import org.elasticsearch.plugin.nlpcn.request.HttpRequester; -import org.elasticsearch.plugin.nlpcn.request.HttpResponse; - -import java.net.URLEncoder; - -/** - * Created by fangbb on 2016-12-12. - */ -public class Analyzer { - - public static String[] analyzer(String term) throws Exception { - //TODO done - HttpRequester request = new HttpRequester(); - HttpResponse response = null; - String sourceTerms = ""; -// http://192.168.25.11:9688/_cat/analyze?text=大数据&analyzer=query_ansj - try { - //String ip = InetAddress.getLocalHost().getHostAddress(); - String ip = AnsjElasticConfigurator.ES_IP; - String port = AnsjElasticConfigurator.ES_PORT; - String midUrl = "/_cat/analyze?analyzer=query_ansj&text="; - //String preUrl = "http://" + ip + ":" + port + midUrl; - String preUrl = "http://192.168.25.11:9688/_cat/analyze?analyzer=query_ansj&text="; - //System.out.println(preUrl); - String enTerm = URLEncoder.encode(term, "UTF-8"); - String url = preUrl + enTerm; - //System.out.println(url); - response = request.sendGet(url); - if (response.getCode() == 200) { - if (response != null && response.getContent().length() > 10) { - sourceTerms = response.getContent(); - } - } - - } catch (Exception e) { - throw new Exception("There is an error in the word segmentation"); - } - return getTerms(sourceTerms).split(","); - } - - private static String getTerms(String sourceTerms) { - StringBuffer sb = new StringBuffer(); - String[] lines = sourceTerms.split("\n"); - int lineLen = lines.length; - for (int i = 0; i < lineLen; i++) { - String[] terms = lines[i].split("\t"); - String term = terms[0].trim(); - int size = terms.length; - if (i == 0) { - //sb.append("\"").append(term).append("\""); - sb.append(term); - } else { - sb.append(",").append(term); - } - } - return sb.toString(); - } - -} diff --git a/src/main/java/org/elasticsearch/plugin/nlpcn/preAnalyzer/AnsjAnalyzer.java b/src/main/java/org/elasticsearch/plugin/nlpcn/preAnalyzer/AnsjAnalyzer.java index a43e84c8..7a3f9391 100644 --- a/src/main/java/org/elasticsearch/plugin/nlpcn/preAnalyzer/AnsjAnalyzer.java +++ b/src/main/java/org/elasticsearch/plugin/nlpcn/preAnalyzer/AnsjAnalyzer.java @@ -1,16 +1,61 @@ package org.elasticsearch.plugin.nlpcn.preAnalyzer; -import org.elasticsearch.common.component.AbstractComponent; -import org.elasticsearch.common.inject.Inject; -import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.plugin.nlpcn.request.HttpRequester; +import org.elasticsearch.plugin.nlpcn.request.HttpResponse; + +import java.net.URLEncoder; /** - * Created by fangbb on 2016-12-6. + * Created by fangbb on 2016-12-12. */ -public class AnsjAnalyzer extends AbstractComponent { - @Inject - public AnsjAnalyzer(final Settings settings){ - super(settings); - AnsjElasticConfigurator.init(settings); +public class Analyzer { + + public static String[] analyzer(String term) throws Exception { + //TODO done + HttpRequester request = new HttpRequester(); + HttpResponse response = null; + String sourceTerms = ""; +// http://192.168.25.11:9688/_cat/analyze?text=大数据&analyzer=query_ansj + try { + //String ip = InetAddress.getLocalHost().getHostAddress(); + String ip = AnsjElasticConfigurator.ES_IP; + String port = AnsjElasticConfigurator.ES_PORT; + String midUrl = "/_cat/analyze?analyzer=query_ansj&text="; + //String preUrl = "http://" + ip + ":" + port + midUrl; + String preUrl = "http://192.168.25.11:9688/_cat/analyze?analyzer=query_ansj&text="; + //System.out.println(preUrl); + String enTerm = URLEncoder.encode(term, "UTF-8"); + String url = preUrl + enTerm; + //System.out.println(url); + response = request.sendGet(url); + if (response.getCode() == 200) { + if (response != null && response.getContent().length() > 10) { + sourceTerms = response.getContent(); + } + } + + } catch (Exception e) { + throw new Exception("There is an error in the word segmentation"); + } + return getTerms(sourceTerms).split(","); } + + private static String getTerms(String sourceTerms) { + StringBuffer sb = new StringBuffer(); + String[] lines = sourceTerms.split("\n"); + int lineLen = lines.length; + for (int i = 0; i < lineLen; i++) { + String[] terms = lines[i].split("\t"); + String term = terms[0].trim(); + int size = terms.length; + if (i == 0) { + //sb.append("\"").append(term).append("\""); + sb.append(term); + } else { + sb.append(",").append(term); + } + } + return sb.toString(); + } + } diff --git a/src/main/java/org/elasticsearch/plugin/nlpcn/preAnalyzer/AnsjAnalyzerPlugin.java b/src/main/java/org/elasticsearch/plugin/nlpcn/preAnalyzer/AnsjAnalyzerPlugin.java new file mode 100644 index 00000000..a43e84c8 --- /dev/null +++ b/src/main/java/org/elasticsearch/plugin/nlpcn/preAnalyzer/AnsjAnalyzerPlugin.java @@ -0,0 +1,16 @@ +package org.elasticsearch.plugin.nlpcn.preAnalyzer; + +import org.elasticsearch.common.component.AbstractComponent; +import org.elasticsearch.common.inject.Inject; +import org.elasticsearch.common.settings.Settings; + +/** + * Created by fangbb on 2016-12-6. + */ +public class AnsjAnalyzer extends AbstractComponent { + @Inject + public AnsjAnalyzer(final Settings settings){ + super(settings); + AnsjElasticConfigurator.init(settings); + } +} diff --git a/src/test/java/org/nlpcn/es4sql/TestSql.java b/src/test/java/org/nlpcn/es4sql/SegTest.java similarity index 100% rename from src/test/java/org/nlpcn/es4sql/TestSql.java rename to src/test/java/org/nlpcn/es4sql/SegTest.java From 27cff2082f0bd351eff10bfd797f47a83bd7f42f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9E=97=E9=9B=AA=E6=B8=85?= Date: Tue, 13 Dec 2016 19:18:20 +0800 Subject: [PATCH 13/23] =?UTF-8?q?=E6=B7=BB=E5=8A=A0SqlParseAnalyzer?= =?UTF-8?q?=E7=9A=84=E5=8D=95=E5=85=83=E6=B5=8B=E8=AF=95=EF=BC=8C=E5=B9=B6?= =?UTF-8?q?=E5=9B=A0=E6=AD=A4=E9=87=8D=E6=9E=84=E7=9B=B8=E5=85=B3=E4=BB=A3?= =?UTF-8?q?=E7=A0=81=E3=80=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../plugin/nlpcn/RestSqlAction.java | 6 +- .../elasticsearch/plugin/nlpcn/SqlPlug.java | 4 +- .../plugin/nlpcn/preAnalyzer/Analyzer.java | 8 ++ .../nlpcn/preAnalyzer/AnsjAnalyzer.java | 4 +- .../nlpcn/preAnalyzer/AnsjAnalyzerPlugin.java | 4 +- .../nlpcn/preAnalyzer/SqlParseAnalyzer.java | 55 ++++++------ .../java/org/nlpcn/es4sql/domain/Field.java | 1 + .../java/org/nlpcn/es4sql/domain/Order.java | 1 + .../java/org/nlpcn/es4sql/domain/Select.java | 1 + .../plugin/nlpcn/SqlParseAnalyzerTest.java | 84 +++++++++++++++++++ .../java/org/nlpcn/es4sql/PreAnalyzer.java | 6 +- src/test/java/org/nlpcn/es4sql/SegTest.java | 19 +++-- 12 files changed, 153 insertions(+), 40 deletions(-) create mode 100644 src/main/java/org/elasticsearch/plugin/nlpcn/preAnalyzer/Analyzer.java create mode 100644 src/test/java/org/elasticsesarch/plugin/nlpcn/SqlParseAnalyzerTest.java diff --git a/src/main/java/org/elasticsearch/plugin/nlpcn/RestSqlAction.java b/src/main/java/org/elasticsearch/plugin/nlpcn/RestSqlAction.java index 1d00a793..25752ab9 100644 --- a/src/main/java/org/elasticsearch/plugin/nlpcn/RestSqlAction.java +++ b/src/main/java/org/elasticsearch/plugin/nlpcn/RestSqlAction.java @@ -5,6 +5,7 @@ import org.elasticsearch.common.settings.Settings; import org.elasticsearch.plugin.nlpcn.executors.ActionRequestRestExecuterFactory; import org.elasticsearch.plugin.nlpcn.executors.RestExecutor; +import org.elasticsearch.plugin.nlpcn.preAnalyzer.AnsjAnalyzer; import org.elasticsearch.plugin.nlpcn.preAnalyzer.SqlAnalyzer; import org.elasticsearch.plugin.nlpcn.preAnalyzer.SqlParseAnalyzer; import org.elasticsearch.plugin.nlpcn.preAnalyzer.SqlSegment; @@ -36,7 +37,8 @@ protected void handleRequest(RestRequest request, RestChannel channel, final Cli } //ananlyze //sql = SqlAnalyzer.seg(sql); - sql = SqlParseAnalyzer.seg(sql); + SqlParseAnalyzer sqlParseAnalyzer = new SqlParseAnalyzer(new AnsjAnalyzer()); + sql = sqlParseAnalyzer.seg(sql); SearchDao searchDao = new SearchDao(client); QueryAction queryAction= searchDao.explain(sql); @@ -54,4 +56,4 @@ protected void handleRequest(RestRequest request, RestChannel channel, final Cli restExecutor.execute(client,params,queryAction,channel); } } -} \ No newline at end of file +} diff --git a/src/main/java/org/elasticsearch/plugin/nlpcn/SqlPlug.java b/src/main/java/org/elasticsearch/plugin/nlpcn/SqlPlug.java index fbc9f41d..7e2890a6 100644 --- a/src/main/java/org/elasticsearch/plugin/nlpcn/SqlPlug.java +++ b/src/main/java/org/elasticsearch/plugin/nlpcn/SqlPlug.java @@ -3,7 +3,7 @@ import org.elasticsearch.common.inject.AbstractModule; import org.elasticsearch.common.inject.Module; -import org.elasticsearch.plugin.nlpcn.preAnalyzer.AnsjAnalyzer; +import org.elasticsearch.plugin.nlpcn.preAnalyzer.AnsjAnalyzerPlugin; import org.elasticsearch.plugins.Plugin; import org.elasticsearch.rest.RestModule; @@ -31,7 +31,7 @@ public Collection nodeModules() { public static class AnsjModule extends AbstractModule { @Override protected void configure() { - bind(AnsjAnalyzer.class).asEagerSingleton(); + bind(AnsjAnalyzerPlugin.class).asEagerSingleton(); } } diff --git a/src/main/java/org/elasticsearch/plugin/nlpcn/preAnalyzer/Analyzer.java b/src/main/java/org/elasticsearch/plugin/nlpcn/preAnalyzer/Analyzer.java new file mode 100644 index 00000000..0597f416 --- /dev/null +++ b/src/main/java/org/elasticsearch/plugin/nlpcn/preAnalyzer/Analyzer.java @@ -0,0 +1,8 @@ +package org.elasticsearch.plugin.nlpcn.preAnalyzer; + +/** + * Created by linxueqing on 2016/12/13. + */ +public class Analyzer { + public String[] analyzer(String term) throws Exception {return new String[]{term};} +} diff --git a/src/main/java/org/elasticsearch/plugin/nlpcn/preAnalyzer/AnsjAnalyzer.java b/src/main/java/org/elasticsearch/plugin/nlpcn/preAnalyzer/AnsjAnalyzer.java index 7a3f9391..55eb02c4 100644 --- a/src/main/java/org/elasticsearch/plugin/nlpcn/preAnalyzer/AnsjAnalyzer.java +++ b/src/main/java/org/elasticsearch/plugin/nlpcn/preAnalyzer/AnsjAnalyzer.java @@ -8,9 +8,9 @@ /** * Created by fangbb on 2016-12-12. */ -public class Analyzer { +public class AnsjAnalyzer extends Analyzer { - public static String[] analyzer(String term) throws Exception { + public String[] analyzer(String term) throws Exception { //TODO done HttpRequester request = new HttpRequester(); HttpResponse response = null; diff --git a/src/main/java/org/elasticsearch/plugin/nlpcn/preAnalyzer/AnsjAnalyzerPlugin.java b/src/main/java/org/elasticsearch/plugin/nlpcn/preAnalyzer/AnsjAnalyzerPlugin.java index a43e84c8..d7d36262 100644 --- a/src/main/java/org/elasticsearch/plugin/nlpcn/preAnalyzer/AnsjAnalyzerPlugin.java +++ b/src/main/java/org/elasticsearch/plugin/nlpcn/preAnalyzer/AnsjAnalyzerPlugin.java @@ -7,9 +7,9 @@ /** * Created by fangbb on 2016-12-6. */ -public class AnsjAnalyzer extends AbstractComponent { +public class AnsjAnalyzerPlugin extends AbstractComponent { @Inject - public AnsjAnalyzer(final Settings settings){ + public AnsjAnalyzerPlugin(final Settings settings){ super(settings); AnsjElasticConfigurator.init(settings); } diff --git a/src/main/java/org/elasticsearch/plugin/nlpcn/preAnalyzer/SqlParseAnalyzer.java b/src/main/java/org/elasticsearch/plugin/nlpcn/preAnalyzer/SqlParseAnalyzer.java index 29f661cb..78833248 100644 --- a/src/main/java/org/elasticsearch/plugin/nlpcn/preAnalyzer/SqlParseAnalyzer.java +++ b/src/main/java/org/elasticsearch/plugin/nlpcn/preAnalyzer/SqlParseAnalyzer.java @@ -21,9 +21,14 @@ * Created by fangbb on 2016-12-6. */ public class SqlParseAnalyzer { - public static String dbType = JdbcConstants.MYSQL; +// public String dbType = JdbcConstants.MYSQL; + private Analyzer analyzer; - public static String seg(String sql) throws Exception { + public SqlParseAnalyzer(Analyzer analyzer) { + this.analyzer = analyzer; + } + + public String seg(String sql) throws Exception { if (sql.contains("seg(")) { MySqlSelectQueryBlock query = getQueryBlock(sql); parseWhere(query.getWhere()); @@ -33,7 +38,7 @@ public static String seg(String sql) throws Exception { return sql; } - private static MySqlSelectQueryBlock getQueryBlock(String sql) { + private MySqlSelectQueryBlock getQueryBlock(String sql) { ElasticLexer lexer = new ElasticLexer(sql); lexer.nextToken(); ElasticSqlExprParser elasticSqlExprParser = new ElasticSqlExprParser(lexer); @@ -46,14 +51,14 @@ private static MySqlSelectQueryBlock getQueryBlock(String sql) { return query; } - private static void parseWhere(SQLExpr where) throws Exception { + private void parseWhere(SQLExpr where) throws Exception { if (where == null) { return; } preTraverse(where); } - private static void parseOrderBys(SQLOrderBy sqlOrderBy) throws Exception { + private void parseOrderBys(SQLOrderBy sqlOrderBy) throws Exception { if (sqlOrderBy == null) { return; } @@ -75,7 +80,7 @@ private static void parseOrderBys(SQLOrderBy sqlOrderBy) throws Exception { } //先序遍历获取叶子节点 - private static void preTraverse(SQLExpr sqlExpr) throws Exception { + private void preTraverse(SQLExpr sqlExpr) throws Exception { if (sqlExpr instanceof SQLBinaryOpExpr) { SQLBinaryOpExpr sqlBinaryOpExpr = (SQLBinaryOpExpr) sqlExpr; SQLExpr left = sqlBinaryOpExpr.getLeft(); @@ -97,7 +102,7 @@ private static void preTraverse(SQLExpr sqlExpr) throws Exception { //TODO 对Nested叶节点拆分 //TODO 分词 //TODO 构造新节点 - private static void replaceNestedLeafNode(SQLExpr sqlExpr) throws Exception { + private void replaceNestedLeafNode(SQLExpr sqlExpr) throws Exception { SQLObject sqlObject = sqlExpr.getParent(); SQLExpr newExpr = parseNested(sqlExpr); if (sqlObject instanceof MySqlSelectQueryBlock) { @@ -116,7 +121,7 @@ private static void replaceNestedLeafNode(SQLExpr sqlExpr) throws Exception { } //TODO 解析nested的叶节点,返回新构造的叶节点 - private static SQLExpr parseNested(SQLExpr sqlExpr) throws Exception { + private SQLExpr parseNested(SQLExpr sqlExpr) throws Exception { String methodName = ((SQLMethodInvokeExpr) sqlExpr).getMethodName(); SQLExpr pathName = ((SQLMethodInvokeExpr) sqlExpr).getParameters().get(0); SQLExpr where = ((SQLMethodInvokeExpr) sqlExpr).getParameters().get(1); @@ -141,7 +146,7 @@ private static SQLExpr parseNested(SQLExpr sqlExpr) throws Exception { return retExpr; } - private static void preTraverseNested(String methodName, SQLExpr pathName, SQLExpr sqlExpr) throws Exception { + private void preTraverseNested(String methodName, SQLExpr pathName, SQLExpr sqlExpr) throws Exception { if (sqlExpr instanceof SQLBinaryOpExpr) { SQLBinaryOpExpr sqlBinaryOpExpr = (SQLBinaryOpExpr) sqlExpr; SQLExpr left = sqlBinaryOpExpr.getLeft(); @@ -157,7 +162,7 @@ private static void preTraverseNested(String methodName, SQLExpr pathName, SQLEx } } - private static boolean isNested(SQLExpr sqlExpr) { + private boolean isNested(SQLExpr sqlExpr) { if (sqlExpr instanceof SQLMethodInvokeExpr) { String mName = ((SQLMethodInvokeExpr) sqlExpr).getMethodName(); if (mName.equals("nested")) { @@ -167,14 +172,14 @@ private static boolean isNested(SQLExpr sqlExpr) { return false; } - private static boolean isLeaf(SQLBinaryOperator sqlBinaryOperator) { + private boolean isLeaf(SQLBinaryOperator sqlBinaryOperator) { if (sqlBinaryOperator.equals(SQLBinaryOperator.BooleanOr) || sqlBinaryOperator.equals(SQLBinaryOperator.BooleanAnd)) { return false; } return true; } - private static boolean isLeaf(SQLExpr sqlExpr) { + private boolean isLeaf(SQLExpr sqlExpr) { if (sqlExpr instanceof SQLBinaryOpExpr) { SQLBinaryOperator sqlBinaryOperator = ((SQLBinaryOpExpr) sqlExpr).getOperator(); if (sqlBinaryOperator.equals(SQLBinaryOperator.BooleanOr) || sqlBinaryOperator.equals(SQLBinaryOperator.BooleanAnd)) { @@ -184,7 +189,7 @@ private static boolean isLeaf(SQLExpr sqlExpr) { return true; } - private static Method parseMethod(SQLExpr right) throws Exception { + private Method parseMethod(SQLExpr right) throws Exception { Method retMethod = new Method(); if (right instanceof SQLMethodInvokeExpr) { SQLMethodInvokeExpr methodInvokeExpr = ((SQLMethodInvokeExpr) right); @@ -215,7 +220,7 @@ private static Method parseMethod(SQLExpr right) throws Exception { return retMethod; } - private static boolean segNoQuota(SQLMethodInvokeExpr methodInvokeExpr) { + private boolean segNoQuota(SQLMethodInvokeExpr methodInvokeExpr) { List params = methodInvokeExpr.getParameters(); if (params.get(0) instanceof SQLIdentifierExpr) { return true; @@ -223,7 +228,7 @@ private static boolean segNoQuota(SQLMethodInvokeExpr methodInvokeExpr) { return false; } - private static void removeSegFun(SQLBinaryOpExpr binaryOpExpr, SQLExpr right, Method method) { + private void removeSegFun(SQLBinaryOpExpr binaryOpExpr, SQLExpr right, Method method) { //当seg内没有引号时,去掉seg() if (segNoQuota((SQLMethodInvokeExpr) right) && method.containSeg()) { method.setParentMethod(null); @@ -232,7 +237,7 @@ private static void removeSegFun(SQLBinaryOpExpr binaryOpExpr, SQLExpr right, Me } } - private static void generateNestedLeafNode(String methodName, SQLExpr pathName, SQLBinaryOpExpr binaryOpExpr) throws Exception { + private void generateNestedLeafNode(String methodName, SQLExpr pathName, SQLBinaryOpExpr binaryOpExpr) throws Exception { //right:a.b = "d" SQLExpr left = binaryOpExpr.getLeft(); SQLExpr right = binaryOpExpr.getRight(); @@ -244,7 +249,7 @@ private static void generateNestedLeafNode(String methodName, SQLExpr pathName, removeSegFun(binaryOpExpr, right, method); //seg(term("abc")) exception if (method.containSeg() && sourceTerm != null) { - String[] terms = Analyzer.analyzer(sourceTerm); + String[] terms = analyzer.analyzer(sourceTerm); //String[] terms = "a,b".split(","); String funName = method.getFunName(); List allNewNode = new ArrayList(); @@ -275,7 +280,7 @@ private static void generateNestedLeafNode(String methodName, SQLExpr pathName, } } - private static void replaceOldNode(SQLExpr sqlExpr,SQLExpr newExpr) throws Exception { + private void replaceOldNode(SQLExpr sqlExpr,SQLExpr newExpr) throws Exception { SQLObject sqlObject = sqlExpr.getParent(); if (sqlObject instanceof MySqlSelectQueryBlock) { ((MySqlSelectQueryBlock) sqlObject).setWhere(newExpr); @@ -295,7 +300,7 @@ private static void replaceOldNode(SQLExpr sqlExpr,SQLExpr newExpr) throws Excep } //构造新的二叉树替换原有节点 - private static void conNestedTree(SQLBinaryOpExpr retExpr, List sqlExprs) throws Exception{ + private void conNestedTree(SQLBinaryOpExpr retExpr, List sqlExprs) throws Exception{ int size = sqlExprs.size(); int andNum = size - 1; List allNode = new ArrayList(); @@ -323,7 +328,7 @@ private static void conNestedTree(SQLBinaryOpExpr retExpr, List sqlExpr } //对叶节点分词,构造新节点 - private static void replaceLeafNode(SQLBinaryOpExpr binaryOpExpr) throws Exception { + private void replaceLeafNode(SQLBinaryOpExpr binaryOpExpr) throws Exception { SQLExpr left = binaryOpExpr.getLeft(); SQLExpr right = binaryOpExpr.getRight(); SQLBinaryOperator operator = binaryOpExpr.getOperator(); @@ -334,7 +339,7 @@ private static void replaceLeafNode(SQLBinaryOpExpr binaryOpExpr) throws Excepti removeSegFun(binaryOpExpr, right, method); //seg(term("abc")) exception if (method.containSeg() && sourceTerm != null) { - String[] terms = Analyzer.analyzer(sourceTerm); + String[] terms = analyzer.analyzer(sourceTerm); //String[] terms = "a,b".split(","); String funName = method.getFunName(); List allNewNode = new ArrayList(); @@ -357,7 +362,7 @@ private static void replaceLeafNode(SQLBinaryOpExpr binaryOpExpr) throws Excepti } //构造新的二叉树替换原有节点 - private static void conTree(SQLBinaryOpExpr retExpr, List SQLBinaryOpNode) { + private void conTree(SQLBinaryOpExpr retExpr, List SQLBinaryOpNode) { int size = SQLBinaryOpNode.size(); int andNum = size - 1; List allNode = new ArrayList(); @@ -384,7 +389,7 @@ private static void conTree(SQLBinaryOpExpr retExpr, List SQLBi //TODO 构造一个节点 - private static SQLBinaryOpExpr createOpNode(String filed, SQLExpr value, SQLBinaryOperator operator) { + private SQLBinaryOpExpr createOpNode(String filed, SQLExpr value, SQLBinaryOperator operator) { SQLBinaryOpExpr retWhere = new SQLBinaryOpExpr(); SQLIdentifierExpr ileft = new SQLIdentifierExpr(); ileft.setName(filed); @@ -395,7 +400,7 @@ private static SQLBinaryOpExpr createOpNode(String filed, SQLExpr value, SQLBina } //TODO 构造一个nested节点 - private static SQLMethodInvokeExpr createNestedNode(String name, SQLExpr pathName, SQLBinaryOpExpr sqlBinaryOpExpr) { + private SQLMethodInvokeExpr createNestedNode(String name, SQLExpr pathName, SQLBinaryOpExpr sqlBinaryOpExpr) { SQLMethodInvokeExpr sqlMethodInvokeExpr = new SQLMethodInvokeExpr(); sqlMethodInvokeExpr.setMethodName(name); sqlMethodInvokeExpr.addParameter(pathName); @@ -403,7 +408,7 @@ private static SQLMethodInvokeExpr createNestedNode(String name, SQLExpr pathNam return sqlMethodInvokeExpr; } - private static String printSql(MySqlSelectQueryBlock query) { + private String printSql(MySqlSelectQueryBlock query) { StringBuilder out = new StringBuilder(); MySqlOutputVisitor visitor = new MySqlOutputVisitor(out); query.accept0(visitor); diff --git a/src/main/java/org/nlpcn/es4sql/domain/Field.java b/src/main/java/org/nlpcn/es4sql/domain/Field.java index c11f7c54..b2c96ff7 100644 --- a/src/main/java/org/nlpcn/es4sql/domain/Field.java +++ b/src/main/java/org/nlpcn/es4sql/domain/Field.java @@ -19,6 +19,7 @@ public class Field implements Cloneable{ private Where where; private String mode; private String sortName; + public Where getWhere() { return where; } diff --git a/src/main/java/org/nlpcn/es4sql/domain/Order.java b/src/main/java/org/nlpcn/es4sql/domain/Order.java index 98f8976c..27ccec6b 100644 --- a/src/main/java/org/nlpcn/es4sql/domain/Order.java +++ b/src/main/java/org/nlpcn/es4sql/domain/Order.java @@ -27,6 +27,7 @@ public Order(boolean isNested, String mode, String path, this.name = name; this.type = type; } + public String getName() { return name; } diff --git a/src/main/java/org/nlpcn/es4sql/domain/Select.java b/src/main/java/org/nlpcn/es4sql/domain/Select.java index 673952af..62f4db1b 100644 --- a/src/main/java/org/nlpcn/es4sql/domain/Select.java +++ b/src/main/java/org/nlpcn/es4sql/domain/Select.java @@ -77,6 +77,7 @@ public void addOrderBy(String name, String type) { } this.orderBys.add(new Order(name, type)); } + //TODO new add public void addOrderBy(boolean isNested, String mode, String path, Where condition, String name, String type) { diff --git a/src/test/java/org/elasticsesarch/plugin/nlpcn/SqlParseAnalyzerTest.java b/src/test/java/org/elasticsesarch/plugin/nlpcn/SqlParseAnalyzerTest.java new file mode 100644 index 00000000..4dd6a81c --- /dev/null +++ b/src/test/java/org/elasticsesarch/plugin/nlpcn/SqlParseAnalyzerTest.java @@ -0,0 +1,84 @@ +package org.elasticsesarch.plugin.nlpcn; + +import org.elasticsearch.plugin.nlpcn.preAnalyzer.Analyzer; +import org.elasticsearch.plugin.nlpcn.preAnalyzer.SqlParseAnalyzer; +import org.junit.Test; +import static org.junit.Assert.assertEquals; + +/** + * Created by linxueqing on 2016/12/13. + */ +public class SqlParseAnalyzerTest { + public class MockAnalyzer extends Analyzer { + @Override + public String[] analyzer(String term) throws Exception { + return new String[]{"大数据", "持续集成"}; + } + } + + @Test + public void withoutSegKeyword() throws Exception { + SqlParseAnalyzer sqlParseAnalyzer = new SqlParseAnalyzer(new MockAnalyzer()); + String inputSql = "select * from a"; + String expectedOutputSql = inputSql; + String outputSql = sqlParseAnalyzer.seg(inputSql); + assertEquals(expectedOutputSql, outputSql); + } + + @Test + public void segOneTerm() throws Exception { + SqlParseAnalyzer sqlParseAnalyzer = new SqlParseAnalyzer(new MockAnalyzer()); + String inputSql = "select * from a where name=seg(\"大数据持续集成\") and age > 18 order by age desc"; + String outputSql = sqlParseAnalyzer.seg(inputSql); + String expectedOutputSql = "SELECT *\n" + + "FROM a\n" + + "WHERE name = '大数据'\n" + + "\tAND name = '持续集成'\n" + + "\tAND age > 18\n" + + "ORDER BY age DESC"; + assertEquals(expectedOutputSql, outputSql); + } + + @Test + public void segOneNestedTerm() throws Exception { + SqlParseAnalyzer sqlParseAnalyzer = new SqlParseAnalyzer(new MockAnalyzer()); + String inputSql = "select * from a where nested(b, b.name=seg(\"大数据持续集成\")) and age > 18 order by age desc"; + String outputSql = sqlParseAnalyzer.seg(inputSql); + String expectedOutputSql = "SELECT *\n" + + "FROM a\n" + + "WHERE nested(b, b.name = '大数据')\n" + + "\tAND nested(b, b.name = '持续集成')\n" + + "\tAND age > 18\n" + + "ORDER BY age DESC"; + assertEquals(expectedOutputSql, outputSql); + } + + @Test + public void segNestedOrder() throws Exception { + SqlParseAnalyzer sqlParseAnalyzer = new SqlParseAnalyzer(new MockAnalyzer()); + String inputSql = "SELECT * FROM a where nested(b,b.c=\"abc\") order by nested(a,sum(a.b),a.c=term(seg(\"大数据持续集成\")))"; + String outputSql = sqlParseAnalyzer.seg(inputSql); + String expectedOutputSql = "SELECT *\n" + + "FROM a\n" + + "WHERE nested(b, b.c = 'abc')\n" + + "ORDER BY nested(a, SUM(a.b), a.c = term('大数据')\n" + + "AND a.c = term('持续集成'))"; + assertEquals(expectedOutputSql, outputSql); + } + + @Test + public void segMoreThanOneTerm() throws Exception { + SqlParseAnalyzer sqlParseAnalyzer = new SqlParseAnalyzer(new MockAnalyzer()); + String inputSql = "SELECT * FROM test where nested(a,a.c=term(seg(\"大数据持续集成\")) and a.c=\"中国\") or nested(a,a.c=term(seg(\"大数据持续集成\")))"; + String outputSql = sqlParseAnalyzer.seg(inputSql); + String expectedOutputSql = "SELECT *\n" + + "FROM test\n" + + "WHERE nested(a, a.c = term('大数据'))\n" + + "\tAND nested(a, a.c = term('持续集成'))\n" + + "\tAND nested(a, a.c = '中国')\n" + + "\tOR nested(a, a.c = term('大数据'))\n" + + "\tAND nested(a, a.c = term('持续集成'))"; + assertEquals(expectedOutputSql, outputSql); + System.out.println(outputSql); + } +} diff --git a/src/test/java/org/nlpcn/es4sql/PreAnalyzer.java b/src/test/java/org/nlpcn/es4sql/PreAnalyzer.java index 302d0603..3fd99ea8 100644 --- a/src/test/java/org/nlpcn/es4sql/PreAnalyzer.java +++ b/src/test/java/org/nlpcn/es4sql/PreAnalyzer.java @@ -1,5 +1,7 @@ package org.nlpcn.es4sql; +import org.elasticsearch.plugin.nlpcn.preAnalyzer.Analyzer; +import org.elasticsearch.plugin.nlpcn.preAnalyzer.AnsjAnalyzer; import org.elasticsearch.plugin.nlpcn.preAnalyzer.SqlParseAnalyzer; import org.junit.Test; @@ -20,7 +22,9 @@ public void SqlParseAnalyzerTest(){ //String sql = "SELECT * FROM test where a is not null and b=\"xxx\" and nested(a,a.c=\"abc\") order by nested(a,sum(a.b),a.c=term(seg(\"百度和谷歌\")))"; //String sql = "SELECT * FROM test where nested(a,a.c=\"abc\") order by nested(a,sum(a.b),a.c=term(seg(\"百度和谷歌\")))"; //String sql = "SELECT * FROM test where nested(a,a.c=term(seg(\"百度和谷歌\"))) "; - String ret = SqlParseAnalyzer.seg(sql); + + SqlParseAnalyzer sqlParseAnalyzer = new SqlParseAnalyzer(new AnsjAnalyzer()); + String ret = sqlParseAnalyzer.seg(sql); System.out.println(ret); } catch (Exception e) { e.printStackTrace(); diff --git a/src/test/java/org/nlpcn/es4sql/SegTest.java b/src/test/java/org/nlpcn/es4sql/SegTest.java index d148b0dc..16ccf0fe 100644 --- a/src/test/java/org/nlpcn/es4sql/SegTest.java +++ b/src/test/java/org/nlpcn/es4sql/SegTest.java @@ -11,6 +11,7 @@ import com.alibaba.druid.sql.dialect.mysql.parser.MySqlExprParser; import com.alibaba.druid.sql.dialect.mysql.parser.MySqlStatementParser; import com.alibaba.druid.sql.dialect.mysql.visitor.MySqlOutputVisitor; +import org.elasticsearch.plugin.nlpcn.preAnalyzer.AnsjAnalyzer; import org.elasticsearch.plugin.nlpcn.preAnalyzer.SqlAnalyzer; import org.elasticsearch.plugin.nlpcn.preAnalyzer.SqlParseAnalyzer; import org.nlpcn.es4sql.parse.ElasticLexer; @@ -22,27 +23,33 @@ /** * Created by fangbb on 2016-12-4. */ -public class Test { +public class SegTest { @Test public void StrTest() { //String sql = "select * from test where nested(info,info.name = term(seg(\"大数据云计算\")) and info.name=seg(\"python|Hbase|Hive\") or info.name=term(seg(\"Hadoop\"))) and city=seg(\"hah\") and province=\"河北省\" order by nested(info,sum(info.age), info.name=seg(\"java\") and info.name=terms(seg(\"python\")) and info.name=seg(\"Hadoop\")) desc,score desc"; String sql = "select * from a where name=seg(\"大数据云计算\") and age > 18 order by age desc"; //System.out.println(sql); //sql = SqlSegment.seg(sql); - sql = SqlParseAnalyzer.seg(sql); + try { + SqlParseAnalyzer sqlParseAnalyzer = new SqlParseAnalyzer(new AnsjAnalyzer()); + sql = sqlParseAnalyzer.seg(sql); + } catch (Exception e) { + System.out.print(e); + } + //sql = SqlAnalyzer.seg(sql); System.out.println("-------------"); System.out.println(sql); System.out.println("-------------"); } - @org.junit.Test + @Test public void Str() throws Exception { String ip = InetAddress.getLocalHost().getHostAddress(); System.out.println(ip); } - @org.junit.Test + @Test public void SqlExprParser() { try { String sql = "select * from a where name=seg(\"大数据云计算\") and age > 18 order by age desc"; @@ -59,7 +66,7 @@ public void SqlExprParser() { } - @org.junit.Test + @Test public void StatementParser() { String sql = "select * from a where name=seg(\"大数据云计算\") and age > 18 order by age desc"; ElasticLexer lexer = new ElasticLexer(sql); @@ -75,7 +82,7 @@ public void StatementParser() { System.out.println(out.toString()); } - @org.junit.Test + @Test public void myStatementParser() { String sql = "select * from a-b where name=seg(\"大数据云计算\") and age > 18 order by age desc"; ElasticLexer lexer = new ElasticLexer(sql); From cf363512f35988f0698c723d96c1fc786e17961e Mon Sep 17 00:00:00 2001 From: fangbbcode Date: Wed, 14 Dec 2016 16:03:14 +0800 Subject: [PATCH 14/23] modify class name and remove [delete function] --- .../plugin/nlpcn/RestSqlAction.java | 6 +- .../elasticsearch/plugin/nlpcn/SqlPlug.java | 4 +- .../plugin/nlpcn/preAnalyzer/Analyzer.java | 4 +- ...zerPlugin.java => AnsjAnalyzerAction.java} | 4 +- ...nsjAnalyzer.java => AnsjAnalyzerImpl.java} | 2 +- .../plugin/nlpcn/preAnalyzer/SqlAnalyzer.java | 146 --------------- .../plugin/nlpcn/preAnalyzer/SqlSegment.java | 172 ------------------ .../nlpcn/es4sql/query/ESActionFactory.java | 10 +- .../plugin/nlpcn/SqlParseAnalyzerTest.java | 2 +- .../java/org/nlpcn/es4sql/PreAnalyzer.java | 5 +- src/test/java/org/nlpcn/es4sql/SegTest.java | 10 +- 11 files changed, 19 insertions(+), 346 deletions(-) rename src/main/java/org/elasticsearch/plugin/nlpcn/preAnalyzer/{AnsjAnalyzerPlugin.java => AnsjAnalyzerAction.java} (75%) rename src/main/java/org/elasticsearch/plugin/nlpcn/preAnalyzer/{AnsjAnalyzer.java => AnsjAnalyzerImpl.java} (97%) delete mode 100644 src/main/java/org/elasticsearch/plugin/nlpcn/preAnalyzer/SqlAnalyzer.java delete mode 100644 src/main/java/org/elasticsearch/plugin/nlpcn/preAnalyzer/SqlSegment.java diff --git a/src/main/java/org/elasticsearch/plugin/nlpcn/RestSqlAction.java b/src/main/java/org/elasticsearch/plugin/nlpcn/RestSqlAction.java index 7c3d7c8f..9ec0fb38 100644 --- a/src/main/java/org/elasticsearch/plugin/nlpcn/RestSqlAction.java +++ b/src/main/java/org/elasticsearch/plugin/nlpcn/RestSqlAction.java @@ -5,8 +5,7 @@ import org.elasticsearch.common.settings.Settings; import org.elasticsearch.plugin.nlpcn.executors.ActionRequestRestExecuterFactory; import org.elasticsearch.plugin.nlpcn.executors.RestExecutor; -import org.elasticsearch.plugin.nlpcn.preAnalyzer.AnsjAnalyzer; -import org.elasticsearch.plugin.nlpcn.preAnalyzer.SqlAnalyzer; +import org.elasticsearch.plugin.nlpcn.preAnalyzer.AnsjAnalyzerImpl; import org.elasticsearch.plugin.nlpcn.preAnalyzer.SqlParseAnalyzer; import org.elasticsearch.rest.*; import org.nlpcn.es4sql.SearchDao; @@ -35,8 +34,7 @@ protected void handleRequest(RestRequest request, RestChannel channel, final Cli sql = request.content().toUtf8(); } //ananlyze - //sql = SqlAnalyzer.seg(sql); - SqlParseAnalyzer sqlParseAnalyzer = new SqlParseAnalyzer(new AnsjAnalyzer()); + SqlParseAnalyzer sqlParseAnalyzer = new SqlParseAnalyzer(new AnsjAnalyzerImpl()); sql = sqlParseAnalyzer.seg(sql); SearchDao searchDao = new SearchDao(client); QueryAction queryAction= searchDao.explain(sql); diff --git a/src/main/java/org/elasticsearch/plugin/nlpcn/SqlPlug.java b/src/main/java/org/elasticsearch/plugin/nlpcn/SqlPlug.java index 7e2890a6..a27350d0 100644 --- a/src/main/java/org/elasticsearch/plugin/nlpcn/SqlPlug.java +++ b/src/main/java/org/elasticsearch/plugin/nlpcn/SqlPlug.java @@ -3,7 +3,7 @@ import org.elasticsearch.common.inject.AbstractModule; import org.elasticsearch.common.inject.Module; -import org.elasticsearch.plugin.nlpcn.preAnalyzer.AnsjAnalyzerPlugin; +import org.elasticsearch.plugin.nlpcn.preAnalyzer.AnsjAnalyzerAction; import org.elasticsearch.plugins.Plugin; import org.elasticsearch.rest.RestModule; @@ -31,7 +31,7 @@ public Collection nodeModules() { public static class AnsjModule extends AbstractModule { @Override protected void configure() { - bind(AnsjAnalyzerPlugin.class).asEagerSingleton(); + bind(AnsjAnalyzerAction.class).asEagerSingleton(); } } diff --git a/src/main/java/org/elasticsearch/plugin/nlpcn/preAnalyzer/Analyzer.java b/src/main/java/org/elasticsearch/plugin/nlpcn/preAnalyzer/Analyzer.java index 0597f416..43f2da9a 100644 --- a/src/main/java/org/elasticsearch/plugin/nlpcn/preAnalyzer/Analyzer.java +++ b/src/main/java/org/elasticsearch/plugin/nlpcn/preAnalyzer/Analyzer.java @@ -3,6 +3,6 @@ /** * Created by linxueqing on 2016/12/13. */ -public class Analyzer { - public String[] analyzer(String term) throws Exception {return new String[]{term};} +public interface Analyzer { + public String[] analyzer(String term) throws Exception; } diff --git a/src/main/java/org/elasticsearch/plugin/nlpcn/preAnalyzer/AnsjAnalyzerPlugin.java b/src/main/java/org/elasticsearch/plugin/nlpcn/preAnalyzer/AnsjAnalyzerAction.java similarity index 75% rename from src/main/java/org/elasticsearch/plugin/nlpcn/preAnalyzer/AnsjAnalyzerPlugin.java rename to src/main/java/org/elasticsearch/plugin/nlpcn/preAnalyzer/AnsjAnalyzerAction.java index d7d36262..0338c203 100644 --- a/src/main/java/org/elasticsearch/plugin/nlpcn/preAnalyzer/AnsjAnalyzerPlugin.java +++ b/src/main/java/org/elasticsearch/plugin/nlpcn/preAnalyzer/AnsjAnalyzerAction.java @@ -7,9 +7,9 @@ /** * Created by fangbb on 2016-12-6. */ -public class AnsjAnalyzerPlugin extends AbstractComponent { +public class AnsjAnalyzerAction extends AbstractComponent { @Inject - public AnsjAnalyzerPlugin(final Settings settings){ + public AnsjAnalyzerAction(final Settings settings){ super(settings); AnsjElasticConfigurator.init(settings); } diff --git a/src/main/java/org/elasticsearch/plugin/nlpcn/preAnalyzer/AnsjAnalyzer.java b/src/main/java/org/elasticsearch/plugin/nlpcn/preAnalyzer/AnsjAnalyzerImpl.java similarity index 97% rename from src/main/java/org/elasticsearch/plugin/nlpcn/preAnalyzer/AnsjAnalyzer.java rename to src/main/java/org/elasticsearch/plugin/nlpcn/preAnalyzer/AnsjAnalyzerImpl.java index 55eb02c4..0c74cf89 100644 --- a/src/main/java/org/elasticsearch/plugin/nlpcn/preAnalyzer/AnsjAnalyzer.java +++ b/src/main/java/org/elasticsearch/plugin/nlpcn/preAnalyzer/AnsjAnalyzerImpl.java @@ -8,7 +8,7 @@ /** * Created by fangbb on 2016-12-12. */ -public class AnsjAnalyzer extends Analyzer { +public class AnsjAnalyzerImpl implements Analyzer { public String[] analyzer(String term) throws Exception { //TODO done diff --git a/src/main/java/org/elasticsearch/plugin/nlpcn/preAnalyzer/SqlAnalyzer.java b/src/main/java/org/elasticsearch/plugin/nlpcn/preAnalyzer/SqlAnalyzer.java deleted file mode 100644 index 6461f8d9..00000000 --- a/src/main/java/org/elasticsearch/plugin/nlpcn/preAnalyzer/SqlAnalyzer.java +++ /dev/null @@ -1,146 +0,0 @@ -package org.elasticsearch.plugin.nlpcn.preAnalyzer; - -import com.alibaba.druid.sql.SQLUtils; -import org.elasticsearch.plugin.nlpcn.request.HttpRequester; -import org.elasticsearch.plugin.nlpcn.request.HttpResponse; - -import java.net.URLEncoder; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - -/** - * Created by fangbb on 2016-11-25. - */ -public class SqlAnalyzer { - public static String seg(String sql) { - sql = SQLUtils.formatMySql(sql); - if (sql.contains("seg(")) { - String pattern = " (.*?) = (.*?)seg\\((.*?)\\)"; - Pattern p = Pattern.compile(pattern); - Matcher m = p.matcher(sql); - String pAll = ""; - String pName = ""; - String pFun = ""; - String pTerm = ""; - while (m.find()) { - pAll = m.group(); - pName = parseName(m.group(1)); - pFun = parseFun(m.group(2)); - pTerm = m.group(3); - if (pFun == null || pFun.equals("")) { - sql = replaceSql(sql, pAll, pName, pTerm); - } else { - sql = replaceSql(sql, pAll, pName, pFun, pTerm); - } - } - } - sql = SQLUtils.formatMySql(sql); - return sql.replace(" - ","-"); - } - - public static String replaceSql(String sql, String all, String name, String terms) { - String[] termsArr = analyzer(terms); - int size = termsArr.length - 1; - String newAll = ""; - StringBuffer conBuffer = new StringBuffer(); - //String source = ""; - String source = name + " = " + "seg(" + terms + ")"; - for (int i = 0; i <= size; i++) { - if (i == 0) { - conBuffer.append(name).append("=").append(termsArr[i]); - } else { - conBuffer.append(" and ").append(name).append("=").append(termsArr[i]); - } - } - newAll = all.replace(source, conBuffer.toString()); - sql = sql.replace(all, newAll); - return sql; - } - - public static String replaceSql(String sql, String all, String name, String fun, String terms) { - String[] termsArr = analyzer(terms); - int indexMax = termsArr.length - 1; - String newAll = ""; - StringBuffer conBuffer = new StringBuffer(); - String source = ""; - source = name + " = " + fun + "(" + "seg(" + terms + ")"; - for (int i = 0; i <= indexMax; i++) { - if (i == 0) { //第一个term - if (indexMax == 0) { //共一个term - conBuffer.append(name).append("=").append(fun).append("(").append(termsArr[i]); - } else { - conBuffer.append(name).append("=").append(fun).append("(").append(termsArr[i]).append(")"); - } - } else if (i == indexMax) { //多个term时,最后一个 - conBuffer.append(" and ").append(name).append("=").append(fun).append("(").append(termsArr[i]); - } else { //中间的term - conBuffer.append(" and ").append(name).append("=").append(fun).append("(").append(termsArr[i]).append(")"); - } - } - newAll = all.replace(source, conBuffer.toString()); - sql = sql.replace(all, newAll); - return sql; - } - - - public static String parseName(String name) { - String[] tmp = name.split(",| "); - int num = tmp.length; - name = tmp[num - 1]; - return name; - } - - public static String parseFun(String fun) { - if (fun == null || fun.equals("")) { - fun = ""; - } else { - fun = fun.replace("(", ""); - } - return fun; - } - - public static String[] analyzer(String term) { - //TODO done - term = term.replaceAll("'", ""); - HttpRequester request = new HttpRequester(); - HttpResponse response = null; - String sourceTerms = ""; -// http://192.168.25.11:9688/_cat/analyze?text=大数据&analyzer=query_ansj - try { - String ip = AnsjElasticConfigurator.ES_IP; - String port = AnsjElasticConfigurator.ES_PORT; - String midUrl = "/_cat/analyze?analyzer=query_ansj&text="; - String preUrl = "http://" + ip + ":" + port + midUrl; - //preUrl = "http://192.168.25.11:9688/_cat/analyze?analyzer=query_ansj&text="; - String enTerm = URLEncoder.encode(term, "UTF-8"); - String url = preUrl + enTerm; - response = request.sendGet(url); - if (response.getCode() == 200) { - if (response != null && response.getContent().length() > 10) { - sourceTerms = response.getContent(); - } - } - - } catch (Exception e) { - e.printStackTrace(); - } - return getTerms(sourceTerms).split(","); - } - - private static String getTerms(String sourceTerms) { - StringBuffer sb = new StringBuffer(); - String[] lines = sourceTerms.split("\n"); - int lineLen = lines.length; - for (int i = 0; i < lineLen; i++) { - String[] terms = lines[i].split("\t"); - String term = terms[0].trim(); - if (i == 0) { - sb.append("\"").append(term).append("\""); - } else { - sb.append(",\"").append(term).append("\""); - } - } - return sb.toString(); - } - -} diff --git a/src/main/java/org/elasticsearch/plugin/nlpcn/preAnalyzer/SqlSegment.java b/src/main/java/org/elasticsearch/plugin/nlpcn/preAnalyzer/SqlSegment.java deleted file mode 100644 index 0b0a4113..00000000 --- a/src/main/java/org/elasticsearch/plugin/nlpcn/preAnalyzer/SqlSegment.java +++ /dev/null @@ -1,172 +0,0 @@ -package org.elasticsearch.plugin.nlpcn.preAnalyzer; - -import com.alibaba.druid.sql.SQLUtils; -import org.elasticsearch.plugin.nlpcn.preAnalyzer.AnsjElasticConfigurator; -import org.elasticsearch.plugin.nlpcn.request.HttpRequester; -import org.elasticsearch.plugin.nlpcn.request.HttpResponse; - -import java.net.URLEncoder; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - -/** - * Created by fangbb on 2016-11-25. - */ -public class SqlSegment { - public static String seg(String sql) { - if (sql.contains("seg(")){ - String pattern = "( .*?)=(.*?)seg\\((.*?)\\)"; - Pattern p = Pattern.compile(pattern); - Matcher m = p.matcher(sql); - String pAll = ""; - String pName = ""; - String pFun = ""; - String pTerm = ""; - //System.out.println(sql); - while (m.find()) { - pAll = m.group(); - pName = m.group(1); - pFun = m.group(2); - pTerm = m.group(3); - System.out.println("========"); - System.out.println(pAll); - if (pFun.contains("=")) { - String pNew = pFun + "seg(" + pTerm + ")"; - // System.out.println(pNew); - Matcher pm = p.matcher(pNew); - pm.find(); - pAll = pm.group(); - pName = pm.group(1); - pFun = pm.group(2); - pTerm = pm.group(3); - } - pName = parseName(pName); - pFun = parseFun(pFun); - if (pFun == null || pFun.equals("")) { - sql = replaceSql(sql, pAll, pName, pTerm); - } else { - sql = replaceSql(sql, pAll, pName, pFun, pTerm); - } - } - } - return sql; - } - - public static String replaceSql(String sql, String all, String name, String terms) { - String[] termsArr = analyzer(terms); - int size = termsArr.length - 1; - String newAll = ""; - StringBuffer conBuffer = new StringBuffer(); - //String source = ""; - String source = name + "=" + "seg(" + terms + ")"; - for (int i = 0; i <= size; i++) { - if (i == 0) { - conBuffer.append(name).append("=").append(termsArr[i]); - } else { - conBuffer.append(" and ").append(name).append("=").append(termsArr[i]); - } - } - newAll = all.replace(source, conBuffer.toString()); - sql = sql.replace(all, newAll); - return sql; - } - - public static String replaceSql(String sql, String all, String name, String fun, String terms) { - String[] termsArr = analyzer(terms); - int indexMax = termsArr.length - 1; - String newAll = ""; - //String conditions = ""; - StringBuffer conBuffer = new StringBuffer(); - String source = ""; - source = name + "=" + fun + "(" + "seg(" + terms + ")"; - for (int i = 0; i <= indexMax; i++) { - if (i == 0) { //第一个term - if (indexMax == 0) { //共一个term - conBuffer.append(name).append("=").append(fun).append("(").append(termsArr[i]); - } else { - conBuffer.append(name).append("=").append(fun).append("(").append(termsArr[i]).append(")"); - } - } else if (i == indexMax) { //多个term时,最后一个 - conBuffer.append(" and ").append(name).append("=").append(fun).append("(").append(termsArr[i]); - } else { //中间的term - conBuffer.append(" and ").append(name).append("=").append(fun).append("(").append(termsArr[i]).append(")"); - } - } - newAll = all.replace(source, conBuffer.toString()); - sql = sql.replace(all, newAll); - return sql; - } - - - public static String parseName(String name) { - String[] tmp = name.split(",| "); - int num = tmp.length; - name = tmp[num - 1]; - return name; - } - - public static String parseFun(String fun) { - if (fun == null || fun.equals("")) { - fun = ""; - } else { - fun = fun.replace("(", ""); - } - return fun; - } - - public static String parseTerm(String term) { - if (term != null || term.equals("")) { - term = term.replace("\"", ""); - } - return term; - } - - public static String[] analyzer(String term) { - //TODO done - term = term.replaceAll("\"", ""); - HttpRequester request = new HttpRequester(); - HttpResponse response = null; - String sourceTerms = ""; -// http://192.168.25.11:9688/_cat/analyze?text=大数据&analyzer=query_ansj - try { - //String ip = InetAddress.getLocalHost().getHostAddress(); - String ip = AnsjElasticConfigurator.ES_IP; - String port = AnsjElasticConfigurator.ES_PORT; - String midUrl = "/_cat/analyze?analyzer=query_ansj&text="; - String preUrl = "http://" + ip + ":" + port + midUrl; - //String preUrl = "http://192.168.25.11:9688/_cat/analyze?analyzer=query_ansj&text="; - System.out.println(preUrl); - String enTerm = URLEncoder.encode(term, "UTF-8"); - String url = preUrl + enTerm; - System.out.println(url); - response = request.sendGet(url); - if (response.getCode() == 200) { - if (response != null && response.getContent().length() > 10) { - sourceTerms = response.getContent(); - } - } - - } catch (Exception e) { - e.printStackTrace(); - } - return getTerms(sourceTerms).split(","); - } - - private static String getTerms(String sourceTerms) { - StringBuffer sb = new StringBuffer(); - String[] lines = sourceTerms.split("\n"); - int lineLen = lines.length; - for (int i = 0; i < lineLen; i++) { - String[] terms = lines[i].split("\t"); - String term = terms[0].trim(); - int size = terms.length; - if (i == 0) { - sb.append("\"").append(term).append("\""); - } else { - sb.append(",\"").append(term).append("\""); - } - } - return sb.toString(); - } - -} diff --git a/src/main/java/org/nlpcn/es4sql/query/ESActionFactory.java b/src/main/java/org/nlpcn/es4sql/query/ESActionFactory.java index 3618951f..0e31c6b7 100644 --- a/src/main/java/org/nlpcn/es4sql/query/ESActionFactory.java +++ b/src/main/java/org/nlpcn/es4sql/query/ESActionFactory.java @@ -52,11 +52,11 @@ public static QueryAction create(Client client, String sql) throws SqlParseExcep handleSubQueries(client, select); return handleSelect(client, select); } - case "DELETE": - SQLStatementParser parser = createSqlStatementParser(sql); - SQLDeleteStatement deleteStatement = parser.parseDeleteStatement(); - Delete delete = new SqlParser().parseDelete(deleteStatement); - return new DeleteQueryAction(client, delete); +// case "DELETE": +// SQLStatementParser parser = createSqlStatementParser(sql); +// SQLDeleteStatement deleteStatement = parser.parseDeleteStatement(); +// Delete delete = new SqlParser().parseDelete(deleteStatement); +// return new DeleteQueryAction(client, delete); case "SHOW": return new ShowQueryAction(client, sql); default: diff --git a/src/test/java/org/elasticsesarch/plugin/nlpcn/SqlParseAnalyzerTest.java b/src/test/java/org/elasticsesarch/plugin/nlpcn/SqlParseAnalyzerTest.java index 4dd6a81c..2f9e3aed 100644 --- a/src/test/java/org/elasticsesarch/plugin/nlpcn/SqlParseAnalyzerTest.java +++ b/src/test/java/org/elasticsesarch/plugin/nlpcn/SqlParseAnalyzerTest.java @@ -9,7 +9,7 @@ * Created by linxueqing on 2016/12/13. */ public class SqlParseAnalyzerTest { - public class MockAnalyzer extends Analyzer { + public class MockAnalyzer implements Analyzer { @Override public String[] analyzer(String term) throws Exception { return new String[]{"大数据", "持续集成"}; diff --git a/src/test/java/org/nlpcn/es4sql/PreAnalyzer.java b/src/test/java/org/nlpcn/es4sql/PreAnalyzer.java index 3fd99ea8..8fdc446f 100644 --- a/src/test/java/org/nlpcn/es4sql/PreAnalyzer.java +++ b/src/test/java/org/nlpcn/es4sql/PreAnalyzer.java @@ -1,7 +1,6 @@ package org.nlpcn.es4sql; -import org.elasticsearch.plugin.nlpcn.preAnalyzer.Analyzer; -import org.elasticsearch.plugin.nlpcn.preAnalyzer.AnsjAnalyzer; +import org.elasticsearch.plugin.nlpcn.preAnalyzer.AnsjAnalyzerImpl; import org.elasticsearch.plugin.nlpcn.preAnalyzer.SqlParseAnalyzer; import org.junit.Test; @@ -23,7 +22,7 @@ public void SqlParseAnalyzerTest(){ //String sql = "SELECT * FROM test where nested(a,a.c=\"abc\") order by nested(a,sum(a.b),a.c=term(seg(\"百度和谷歌\")))"; //String sql = "SELECT * FROM test where nested(a,a.c=term(seg(\"百度和谷歌\"))) "; - SqlParseAnalyzer sqlParseAnalyzer = new SqlParseAnalyzer(new AnsjAnalyzer()); + SqlParseAnalyzer sqlParseAnalyzer = new SqlParseAnalyzer(new AnsjAnalyzerImpl()); String ret = sqlParseAnalyzer.seg(sql); System.out.println(ret); } catch (Exception e) { diff --git a/src/test/java/org/nlpcn/es4sql/SegTest.java b/src/test/java/org/nlpcn/es4sql/SegTest.java index 16ccf0fe..f2b5d5d5 100644 --- a/src/test/java/org/nlpcn/es4sql/SegTest.java +++ b/src/test/java/org/nlpcn/es4sql/SegTest.java @@ -1,18 +1,14 @@ package org.nlpcn.es4sql; -import com.alibaba.druid.sql.SQLUtils; import com.alibaba.druid.sql.ast.SQLExpr; -import com.alibaba.druid.sql.ast.SQLObject; import com.alibaba.druid.sql.ast.SQLOrderBy; import com.alibaba.druid.sql.ast.SQLStatement; import com.alibaba.druid.sql.ast.expr.SQLQueryExpr; -import com.alibaba.druid.sql.ast.statement.SQLSelectStatement; import com.alibaba.druid.sql.dialect.mysql.ast.statement.MySqlSelectQueryBlock; import com.alibaba.druid.sql.dialect.mysql.parser.MySqlExprParser; import com.alibaba.druid.sql.dialect.mysql.parser.MySqlStatementParser; import com.alibaba.druid.sql.dialect.mysql.visitor.MySqlOutputVisitor; -import org.elasticsearch.plugin.nlpcn.preAnalyzer.AnsjAnalyzer; -import org.elasticsearch.plugin.nlpcn.preAnalyzer.SqlAnalyzer; +import org.elasticsearch.plugin.nlpcn.preAnalyzer.AnsjAnalyzerImpl; import org.elasticsearch.plugin.nlpcn.preAnalyzer.SqlParseAnalyzer; import org.nlpcn.es4sql.parse.ElasticLexer; import org.nlpcn.es4sql.parse.ElasticSqlExprParser; @@ -29,15 +25,13 @@ public void StrTest() { //String sql = "select * from test where nested(info,info.name = term(seg(\"大数据云计算\")) and info.name=seg(\"python|Hbase|Hive\") or info.name=term(seg(\"Hadoop\"))) and city=seg(\"hah\") and province=\"河北省\" order by nested(info,sum(info.age), info.name=seg(\"java\") and info.name=terms(seg(\"python\")) and info.name=seg(\"Hadoop\")) desc,score desc"; String sql = "select * from a where name=seg(\"大数据云计算\") and age > 18 order by age desc"; //System.out.println(sql); - //sql = SqlSegment.seg(sql); try { - SqlParseAnalyzer sqlParseAnalyzer = new SqlParseAnalyzer(new AnsjAnalyzer()); + SqlParseAnalyzer sqlParseAnalyzer = new SqlParseAnalyzer(new AnsjAnalyzerImpl()); sql = sqlParseAnalyzer.seg(sql); } catch (Exception e) { System.out.print(e); } - //sql = SqlAnalyzer.seg(sql); System.out.println("-------------"); System.out.println(sql); System.out.println("-------------"); From c0bf9442c37d94743e46102da65f8f0c137e4c79 Mon Sep 17 00:00:00 2001 From: gitxxx Date: Wed, 14 Dec 2016 17:01:34 +0800 Subject: [PATCH 15/23] fix exception --- .../plugin/nlpcn/preAnalyzer/SqlParseAnalyzer.java | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/main/java/org/elasticsearch/plugin/nlpcn/preAnalyzer/SqlParseAnalyzer.java b/src/main/java/org/elasticsearch/plugin/nlpcn/preAnalyzer/SqlParseAnalyzer.java index 17266fda..0af7031f 100644 --- a/src/main/java/org/elasticsearch/plugin/nlpcn/preAnalyzer/SqlParseAnalyzer.java +++ b/src/main/java/org/elasticsearch/plugin/nlpcn/preAnalyzer/SqlParseAnalyzer.java @@ -72,7 +72,7 @@ private void parseOrderBys(SQLOrderBy sqlOrderBy) throws Exception { if (nested.equals("nested") && params.size() == 3) { parseWhere(params.get(2)); } else { - new Exception("Nested sorting must be 3 parameters"); + throw new Exception("Nested sorting must be 3 parameters"); } } } @@ -120,6 +120,9 @@ private void replaceNestedLeafNode(SQLExpr sqlExpr) throws Exception { //TODO 解析nested的叶节点,返回新构造的叶节点 private SQLExpr parseNested(SQLExpr sqlExpr) throws Exception { String methodName = ((SQLMethodInvokeExpr) sqlExpr).getMethodName(); + if (((SQLMethodInvokeExpr) sqlExpr).getParameters().size() != 2) { + throw new Exception("Nested query must be 2 parameters"); + } SQLExpr pathName = ((SQLMethodInvokeExpr) sqlExpr).getParameters().get(0); SQLExpr where = ((SQLMethodInvokeExpr) sqlExpr).getParameters().get(1); SQLExpr retExpr = null; From 9d35fbb9bbbb2c67e8e84c9c405496326f1ffeb5 Mon Sep 17 00:00:00 2001 From: gitxxx Date: Wed, 14 Dec 2016 19:22:42 +0800 Subject: [PATCH 16/23] doc --- es-sql.md | 176 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 176 insertions(+) create mode 100644 es-sql.md diff --git a/es-sql.md b/es-sql.md new file mode 100644 index 00000000..2af83aa9 --- /dev/null +++ b/es-sql.md @@ -0,0 +1,176 @@ +# es-sql + +## 编译和安装 + +### 编译 + + mvn clean package assembly:single -DskipTests + +### 安装方法: + + unzip elasticsearch-sql-2.3.1.1.zip + mv elasticsearch-sql-2.3.1.1 sql + rm elasticsearch-sql-2.3.1.1.zip + +或者 + + ./bin/plugin -u file:///home/omershelef/IdeaProjects/elasticsearch-sql/target/elasticsearch-sql-1.3.2.zip --install sql + +### es启动方法: + + ../bin/elasticsearch -Des.security.manager.enabled=false + + +## 特性 + +### SQL 特性 + + * SQL Select + * SQL Where + * SQL Order By + * SQL Group By + * SQL AND & OR + * SQL Like + * SQL COUNT distinct + * SQL In + * SQL Between + * SQL Aliases + * SQL Not Null + * SQL(ES) Date + * SQL avg() + * SQL count() + * SQL last() + * SQL max() + * SQL min() + * SQL sum() + * SQL Nulls + * SQL isnull() + * SQL now() + + +## 增强SQL特性 + + * ES nested + * ES seg + * ES TopHits + * ES MISSING + * ES STATS + * ES GEO_INTERSECTS + * ES GEO_BOUNDING_BOX + * ES GEO_DISTANCE + * ES GEOHASH_GRID aggregation + + +## 接口说明: + +### 解析sql 2 sql 接口: + + http://192.168.25.11:9688/_sql/_seg?sql=select * from test where province="河北省" + + +### 解析sql 2 es 接口: + + http://localhost:9200/_sql/_explain?sql=select * from indexName limit 10 + +### 执行sql地址: + + http://localhost:9200/_sql?sql=select * from indexName limit 10 + +### UI + + http://192.168.25.11:9688/_plugin/sql/ + +## 例子: + +### 不使用函数 + +当不使用任何的函数时,默认是使用的match查询,match则会进行分析器处理,分析器中的分词器会将搜索关键字分割成单独的词(terms)或者标记(tokens) 。 +该match的type是phrase,phrase表示确切的匹配若干个单词或短语, 如title: “brown dog”, 则查询title中包含brown和dog, 且两个是连接在一起的 + +例子: + + SELECT * FROM test_csdn_user_profile_12_201512_v4 where title = "brown dog" + +### matchQuery() + +matchQuery()使用的type是boolean。 +函数中可指定两个参数,不可指定operator和minimum_should_match,除了可以指定分词器外,和不使用函数没有区别: + +* 第一个参数是:查询词 +* 第二个参数是:analyzer,有三个值可供选择:query_ansj、dic_ansj、index_ansj + +例子: + + select * from user_metric where province = matchQuery("河北") + + +### term() + +term查找时内容精确匹配,只有一个参数,即需要查询的词 + +例子: + + select * from user_metric where province = term("河北") + +### IN_TERMS() + +在函数中可以指定多个词,进行查询 + +例子: + + SELECT * FROM test_csdn_user_profile_12_201512_v4 where province = IN_TERMS("河北省","北京市") + + +### 根据Id查询 IDS_QUERY() + +可指定多个id进行查询,函数中第一个参数是type,后边是需要指定的id,对于type加不加双引号均可 + +例子: + + select * from %s/dog where _id = IDS_QUERY(dog,1,2,3) + SELECT * FROM test_csdn_user_profile_12_201512_v4 where _id = IDS_QUERY("user","azjw1989","jslp1990") + +### 模糊匹配 like + +* *和%代表任意个字符(包括空字符) +* ?问号是单个字符 +请注意,此查询可能很慢,因为它需要迭代许多项。 为了防止极慢的通配符查询,通配符术语不应以通配符*或?开头。 + +例子: + + SELECT * FROM user_metric WHERE province LIKE "邯郸%%" + +### in + +使用match进行多个值查询,各个值间是或的关系 + +例子: + + SELECT * FROM user_profile_12_201512_v4 where province in ("河北省","北京市") + +### 嵌套类型 nested() + +nested方法可在where和order by中使用。 +在where中有两个参数: +* 第一个参数是:父field +* 第二个参数是:子filed的表达式 + +在order by中有三个参数: +* 第一个参数是:父field +* 第二个参数是:指定排序字段,以及排序函数,可使用sum、min、max、avg四个函数 +* 第三个参数是:子filed的表达式 + +例子: + + SELECT * FROM elasticsearch-sql_test_index where nested(message,message.info=term("c")) and nested(message,message.info=term("a")) order by nested(message, sum(message.dayOfWeek),message.info=term("a") and message.info=term("c")) desc + +### object对象 + +非嵌套的object,直接使用 父field.子field 即可查询 + +例子: + SELECT * FROM elasticsearch-sql_test_index where message.info=term("c") + + +http://192.168.25.11:9688/_sql/_seg?sql=select%20*%20from%20awhere nested(a,(a.b="x" or a.b=seg"y")and a.b=seg"z") + From 10d0f4ae0aa80007cae828f89f19f8c632a42413 Mon Sep 17 00:00:00 2001 From: gitxxx Date: Thu, 15 Dec 2016 15:17:38 +0800 Subject: [PATCH 17/23] add new function for nesting sort --- .../nlpcn/es4sql/query/maker/SortMaker.java | 25 ++++++++++++++++++- .../java/org/nlpcn/es4sql/NestedSortTest.java | 6 +++-- 2 files changed, 28 insertions(+), 3 deletions(-) diff --git a/src/main/java/org/nlpcn/es4sql/query/maker/SortMaker.java b/src/main/java/org/nlpcn/es4sql/query/maker/SortMaker.java index db4a062e..7ac3ef10 100644 --- a/src/main/java/org/nlpcn/es4sql/query/maker/SortMaker.java +++ b/src/main/java/org/nlpcn/es4sql/query/maker/SortMaker.java @@ -1,5 +1,6 @@ package org.nlpcn.es4sql.query.maker; +import com.alibaba.druid.sql.ast.expr.SQLMethodInvokeExpr; import org.elasticsearch.index.query.BoolQueryBuilder; import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.index.query.QueryBuilders; @@ -46,6 +47,7 @@ public static SortBuilder explan(Order order) { public static void setCondition(List conditions, BoolQueryBuilder queryBuilder) { String key = ""; String val = ""; + String methodName = ""; for (Where con : conditions) { if (con instanceof Condition) { key = ((Condition) con).getName(); @@ -53,10 +55,17 @@ public static void setCondition(List conditions, BoolQueryBuilder queryBu if (vals.getClass().isArray()) { Object[] md = (Object[]) vals; val = md[0].toString(); + if (((Condition) con).getOpear().name().equals("TERM")){ + methodName = "term"; + } + } else if (vals instanceof SQLMethodInvokeExpr) { + methodName = ((SQLMethodInvokeExpr) vals).getMethodName(); + val = ((SQLMethodInvokeExpr) vals).getParameters().get(0).toString(); } else { + methodName = ""; val = vals.toString(); } - queryBuilder = explanSort(queryBuilder, key, val); + queryBuilder = explanSort(queryBuilder, methodName, key, val); } else if (con instanceof Where) { List conWhere = con.getWheres(); setCondition(conWhere, queryBuilder); @@ -65,6 +74,20 @@ public static void setCondition(List conditions, BoolQueryBuilder queryBu } } + public static BoolQueryBuilder explanSort(BoolQueryBuilder queryBuilder, String methodName, String key, String value) { + if (methodName.equals("")) { + QueryBuilder termQueryBuilder = QueryBuilders.matchPhraseQuery(key, value); + queryBuilder = queryBuilder.should(termQueryBuilder); + } else if (methodName.equals("matchQuery")) { + QueryBuilder termQueryBuilder = QueryBuilders.matchQuery(key, value); + queryBuilder = queryBuilder.should(termQueryBuilder); + } else if (methodName.equals("term")) { + QueryBuilder termQueryBuilder = QueryBuilders.termQuery(key, value); + queryBuilder = queryBuilder.should(termQueryBuilder); + } + return queryBuilder; + } + public static BoolQueryBuilder explanSort(BoolQueryBuilder queryBuilder, String key, String value) { QueryBuilder termQueryBuilder = QueryBuilders.termQuery(key, value); queryBuilder = queryBuilder.should(termQueryBuilder); diff --git a/src/test/java/org/nlpcn/es4sql/NestedSortTest.java b/src/test/java/org/nlpcn/es4sql/NestedSortTest.java index 2cf2ccee..8b5041b2 100644 --- a/src/test/java/org/nlpcn/es4sql/NestedSortTest.java +++ b/src/test/java/org/nlpcn/es4sql/NestedSortTest.java @@ -20,8 +20,10 @@ public void nestedSortTest(){ try { TransportClient client; client = TransportClient.builder().addPlugin(DeleteByQueryPlugin.class).build().addTransportAddress(getTransportAddress()); - //String sql = "select * from test where nested(info,info.name=term(\"java\") and info.name=term(\"python\") or info.name=term(\"Hadoop\")) order by nested(info,sum(info.age),info.name=\"java\" and info.name=\"python\" and info.name=\"Hadoop\") desc,score desc"; - String sql = "SELECT * FROM test order by nested(a,sum(a.b),a.c=term(\"hadoop\") and a.c=term(\"百度和谷歌\") or a.c=term(\"test\") or a.c=term(\"test2\"))"; + String sql = "select * from test where nested(info,info.name=term(\"java\") and info.name=term(\"python\") or info.name=term(\"Hadoop\")) order by nested(info,sum(info.age),info.name=matchQuery(\"java\") and info.name=term(\"python\") and info.name=\"Hadoop\") desc,score desc"; + //String sql = "SELECT * FROM test order by nested(a,sum(a.b),a.c=matchQuery(\"百度和谷歌\"))"; + //String sql = "SELECT * FROM test order by nested(a,sum(a.b),a.c=term(\"百度和谷歌\") and a.c=term(\"test\"))"; + //String sql = "SELECT * FROM test order by nested(a,sum(a.b),a.c=\"百度和谷歌\")"; //String sql = "SELECT * FROM test where nested(calc_skill_blog,calc_skill_blog.keyword=\"java\")"; Long now = System.currentTimeMillis(); SearchDao searchDao = new SearchDao(client); From d977e29a7641a41ab5f76a75922a61f79c7fc4d2 Mon Sep 17 00:00:00 2001 From: gitxxx Date: Thu, 15 Dec 2016 15:33:57 +0800 Subject: [PATCH 18/23] add unit test 4 nesting sort --- .../java/org/nlpcn/es4sql/NestedSortTest.java | 186 ++++++++++++++++-- 1 file changed, 175 insertions(+), 11 deletions(-) diff --git a/src/test/java/org/nlpcn/es4sql/NestedSortTest.java b/src/test/java/org/nlpcn/es4sql/NestedSortTest.java index 8b5041b2..667f6c8a 100644 --- a/src/test/java/org/nlpcn/es4sql/NestedSortTest.java +++ b/src/test/java/org/nlpcn/es4sql/NestedSortTest.java @@ -3,6 +3,7 @@ import org.elasticsearch.client.transport.TransportClient; import org.elasticsearch.common.transport.InetSocketTransportAddress; import org.elasticsearch.plugin.deletebyquery.DeleteByQueryPlugin; +import org.junit.Assert; import org.junit.Test; import org.nlpcn.es4sql.query.QueryAction; import org.nlpcn.es4sql.query.SqlElasticRequestBuilder; @@ -17,28 +18,191 @@ public class NestedSortTest { @Test public void nestedSortTest(){ + String sql = "select * from test where nested(info,info.name=term(\"java\") and info.name=term(\"python\") or info.name=term(\"Hadoop\")) order by nested(info,sum(info.age),info.name=matchQuery(\"java\") and info.name=term(\"python\") and info.name=\"Hadoop\") desc,score desc"; + String actual = exec(sql); + String expected = "{\n" + + " \"from\" : 0,\n" + + " \"size\" : 200,\n" + + " \"query\" : {\n" + + " \"bool\" : {\n" + + " \"must\" : {\n" + + " \"nested\" : {\n" + + " \"query\" : {\n" + + " \"bool\" : {\n" + + " \"must\" : {\n" + + " \"bool\" : {\n" + + " \"should\" : [ {\n" + + " \"bool\" : {\n" + + " \"must\" : [ {\n" + + " \"term\" : {\n" + + " \"info.name\" : \"java\"\n" + + " }\n" + + " }, {\n" + + " \"term\" : {\n" + + " \"info.name\" : \"python\"\n" + + " }\n" + + " } ]\n" + + " }\n" + + " }, {\n" + + " \"term\" : {\n" + + " \"info.name\" : \"Hadoop\"\n" + + " }\n" + + " } ]\n" + + " }\n" + + " }\n" + + " }\n" + + " },\n" + + " \"path\" : \"info\"\n" + + " }\n" + + " }\n" + + " }\n" + + " },\n" + + " \"sort\" : [ {\n" + + " \"info.age\" : {\n" + + " \"order\" : \"desc\",\n" + + " \"missing\" : \"_last\",\n" + + " \"mode\" : \"SUM\",\n" + + " \"nested_filter\" : {\n" + + " \"bool\" : {\n" + + " \"should\" : [ {\n" + + " \"match\" : {\n" + + " \"info.name\" : {\n" + + " \"query\" : \"'java'\",\n" + + " \"type\" : \"boolean\"\n" + + " }\n" + + " }\n" + + " }, {\n" + + " \"term\" : {\n" + + " \"info.name\" : \"python\"\n" + + " }\n" + + " }, {\n" + + " \"match\" : {\n" + + " \"info.name\" : {\n" + + " \"query\" : \"Hadoop\",\n" + + " \"type\" : \"phrase\"\n" + + " }\n" + + " }\n" + + " } ]\n" + + " }\n" + + " },\n" + + " \"nested_path\" : \"info\"\n" + + " }\n" + + " }, {\n" + + " \"score\" : {\n" + + " \"order\" : \"desc\"\n" + + " }\n" + + " } ]\n" + + "}"; + Assert.assertEquals(expected,actual); + } + @Test + public void nestedSortTest2(){ + String sql = "SELECT * FROM test order by nested(a,sum(a.b),a.c=matchQuery(\"百度和谷歌\"))"; + String actual = exec(sql); + String expected = "{\n" + + " \"from\" : 0,\n" + + " \"size\" : 200,\n" + + " \"sort\" : [ {\n" + + " \"a.b\" : {\n" + + " \"order\" : \"asc\",\n" + + " \"missing\" : \"_first\",\n" + + " \"mode\" : \"SUM\",\n" + + " \"nested_filter\" : {\n" + + " \"bool\" : {\n" + + " \"should\" : {\n" + + " \"match\" : {\n" + + " \"a.c\" : {\n" + + " \"query\" : \"'百度和谷歌'\",\n" + + " \"type\" : \"boolean\"\n" + + " }\n" + + " }\n" + + " }\n" + + " }\n" + + " },\n" + + " \"nested_path\" : \"a\"\n" + + " }\n" + + " } ]\n" + + "}"; + Assert.assertEquals(expected,actual); + } + + @Test + public void nestedSortTest3(){ + String sql = "SELECT * FROM test order by nested(a,sum(a.b),a.c=term(\"百度和谷歌\"))"; + String actual = exec(sql); + String expected = "{\n" + + " \"from\" : 0,\n" + + " \"size\" : 200,\n" + + " \"sort\" : [ {\n" + + " \"a.b\" : {\n" + + " \"order\" : \"asc\",\n" + + " \"missing\" : \"_first\",\n" + + " \"mode\" : \"SUM\",\n" + + " \"nested_filter\" : {\n" + + " \"bool\" : {\n" + + " \"should\" : {\n" + + " \"term\" : {\n" + + " \"a.c\" : \"百度和谷歌\"\n" + + " }\n" + + " }\n" + + " }\n" + + " },\n" + + " \"nested_path\" : \"a\"\n" + + " }\n" + + " } ]\n" + + "}"; + Assert.assertEquals(expected,actual); + } + + @Test + public void nestedSortTest4(){ + String sql = "SELECT * FROM test order by nested(a,sum(a.b),a.c=\"百度和谷歌\")"; + String actual = exec(sql); + String expected = "{\n" + + " \"from\" : 0,\n" + + " \"size\" : 200,\n" + + " \"sort\" : [ {\n" + + " \"a.b\" : {\n" + + " \"order\" : \"asc\",\n" + + " \"missing\" : \"_first\",\n" + + " \"mode\" : \"SUM\",\n" + + " \"nested_filter\" : {\n" + + " \"bool\" : {\n" + + " \"should\" : {\n" + + " \"match\" : {\n" + + " \"a.c\" : {\n" + + " \"query\" : \"百度和谷歌\",\n" + + " \"type\" : \"phrase\"\n" + + " }\n" + + " }\n" + + " }\n" + + " }\n" + + " },\n" + + " \"nested_path\" : \"a\"\n" + + " }\n" + + " } ]\n" + + "}"; + Assert.assertEquals(expected,actual); + } + + public static String exec(String sql){ + String jsonExplanation = ""; try { TransportClient client; client = TransportClient.builder().addPlugin(DeleteByQueryPlugin.class).build().addTransportAddress(getTransportAddress()); - String sql = "select * from test where nested(info,info.name=term(\"java\") and info.name=term(\"python\") or info.name=term(\"Hadoop\")) order by nested(info,sum(info.age),info.name=matchQuery(\"java\") and info.name=term(\"python\") and info.name=\"Hadoop\") desc,score desc"; - //String sql = "SELECT * FROM test order by nested(a,sum(a.b),a.c=matchQuery(\"百度和谷歌\"))"; - //String sql = "SELECT * FROM test order by nested(a,sum(a.b),a.c=term(\"百度和谷歌\") and a.c=term(\"test\"))"; - //String sql = "SELECT * FROM test order by nested(a,sum(a.b),a.c=\"百度和谷歌\")"; - //String sql = "SELECT * FROM test where nested(calc_skill_blog,calc_skill_blog.keyword=\"java\")"; - Long now = System.currentTimeMillis(); + //Long now = System.currentTimeMillis(); SearchDao searchDao = new SearchDao(client); QueryAction queryAction = searchDao.explain(sql); SqlElasticRequestBuilder xx = queryAction.explain(); - String jsonExplanation = xx.explain(); - //String jsonExplanation = queryAction.explain().explain(); - System.out.println(System.currentTimeMillis() - now + " ms"); - System.out.println(jsonExplanation); + jsonExplanation = xx.explain(); + //System.out.println(System.currentTimeMillis() - now + " ms"); + //System.out.println(jsonExplanation); } catch (Exception e) { e.printStackTrace(); } + return jsonExplanation; } - public static InetSocketTransportAddress getTransportAddress() throws UnknownHostException { String host = "192.168.25.11"; From b7fc5bb4eee34de4323e7b49dcf3d8315618279d Mon Sep 17 00:00:00 2001 From: gitxxx Date: Thu, 15 Dec 2016 15:51:57 +0800 Subject: [PATCH 19/23] url --- .../plugin/nlpcn/preAnalyzer/AnsjAnalyzerImpl.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/elasticsearch/plugin/nlpcn/preAnalyzer/AnsjAnalyzerImpl.java b/src/main/java/org/elasticsearch/plugin/nlpcn/preAnalyzer/AnsjAnalyzerImpl.java index 0c74cf89..289e3793 100644 --- a/src/main/java/org/elasticsearch/plugin/nlpcn/preAnalyzer/AnsjAnalyzerImpl.java +++ b/src/main/java/org/elasticsearch/plugin/nlpcn/preAnalyzer/AnsjAnalyzerImpl.java @@ -21,8 +21,8 @@ public String[] analyzer(String term) throws Exception { String ip = AnsjElasticConfigurator.ES_IP; String port = AnsjElasticConfigurator.ES_PORT; String midUrl = "/_cat/analyze?analyzer=query_ansj&text="; - //String preUrl = "http://" + ip + ":" + port + midUrl; - String preUrl = "http://192.168.25.11:9688/_cat/analyze?analyzer=query_ansj&text="; + String preUrl = "http://" + ip + ":" + port + midUrl; + //String preUrl = "http://192.168.25.11:9688/_cat/analyze?analyzer=query_ansj&text="; //System.out.println(preUrl); String enTerm = URLEncoder.encode(term, "UTF-8"); String url = preUrl + enTerm; From d21fd687ae7dcb1c6bbc1887bbf04b0a814fd095 Mon Sep 17 00:00:00 2001 From: gitxxx Date: Thu, 15 Dec 2016 16:20:05 +0800 Subject: [PATCH 20/23] modify unit test 4 nesting sort --- .../java/org/nlpcn/es4sql/NestedSortTest.java | 251 ++++++++++++------ 1 file changed, 169 insertions(+), 82 deletions(-) diff --git a/src/test/java/org/nlpcn/es4sql/NestedSortTest.java b/src/test/java/org/nlpcn/es4sql/NestedSortTest.java index 667f6c8a..35b2bb0a 100644 --- a/src/test/java/org/nlpcn/es4sql/NestedSortTest.java +++ b/src/test/java/org/nlpcn/es4sql/NestedSortTest.java @@ -17,86 +17,7 @@ */ public class NestedSortTest { @Test - public void nestedSortTest(){ - String sql = "select * from test where nested(info,info.name=term(\"java\") and info.name=term(\"python\") or info.name=term(\"Hadoop\")) order by nested(info,sum(info.age),info.name=matchQuery(\"java\") and info.name=term(\"python\") and info.name=\"Hadoop\") desc,score desc"; - String actual = exec(sql); - String expected = "{\n" + - " \"from\" : 0,\n" + - " \"size\" : 200,\n" + - " \"query\" : {\n" + - " \"bool\" : {\n" + - " \"must\" : {\n" + - " \"nested\" : {\n" + - " \"query\" : {\n" + - " \"bool\" : {\n" + - " \"must\" : {\n" + - " \"bool\" : {\n" + - " \"should\" : [ {\n" + - " \"bool\" : {\n" + - " \"must\" : [ {\n" + - " \"term\" : {\n" + - " \"info.name\" : \"java\"\n" + - " }\n" + - " }, {\n" + - " \"term\" : {\n" + - " \"info.name\" : \"python\"\n" + - " }\n" + - " } ]\n" + - " }\n" + - " }, {\n" + - " \"term\" : {\n" + - " \"info.name\" : \"Hadoop\"\n" + - " }\n" + - " } ]\n" + - " }\n" + - " }\n" + - " }\n" + - " },\n" + - " \"path\" : \"info\"\n" + - " }\n" + - " }\n" + - " }\n" + - " },\n" + - " \"sort\" : [ {\n" + - " \"info.age\" : {\n" + - " \"order\" : \"desc\",\n" + - " \"missing\" : \"_last\",\n" + - " \"mode\" : \"SUM\",\n" + - " \"nested_filter\" : {\n" + - " \"bool\" : {\n" + - " \"should\" : [ {\n" + - " \"match\" : {\n" + - " \"info.name\" : {\n" + - " \"query\" : \"'java'\",\n" + - " \"type\" : \"boolean\"\n" + - " }\n" + - " }\n" + - " }, {\n" + - " \"term\" : {\n" + - " \"info.name\" : \"python\"\n" + - " }\n" + - " }, {\n" + - " \"match\" : {\n" + - " \"info.name\" : {\n" + - " \"query\" : \"Hadoop\",\n" + - " \"type\" : \"phrase\"\n" + - " }\n" + - " }\n" + - " } ]\n" + - " }\n" + - " },\n" + - " \"nested_path\" : \"info\"\n" + - " }\n" + - " }, {\n" + - " \"score\" : {\n" + - " \"order\" : \"desc\"\n" + - " }\n" + - " } ]\n" + - "}"; - Assert.assertEquals(expected,actual); - } - @Test - public void nestedSortTest2(){ + public void nestedSort4MatchQueryTest(){ String sql = "SELECT * FROM test order by nested(a,sum(a.b),a.c=matchQuery(\"百度和谷歌\"))"; String actual = exec(sql); String expected = "{\n" + @@ -127,7 +48,7 @@ public void nestedSortTest2(){ } @Test - public void nestedSortTest3(){ + public void nestedSort4TermTest(){ String sql = "SELECT * FROM test order by nested(a,sum(a.b),a.c=term(\"百度和谷歌\"))"; String actual = exec(sql); String expected = "{\n" + @@ -155,7 +76,7 @@ public void nestedSortTest3(){ } @Test - public void nestedSortTest4(){ + public void nestedSortDefaultTest(){ String sql = "SELECT * FROM test order by nested(a,sum(a.b),a.c=\"百度和谷歌\")"; String actual = exec(sql); String expected = "{\n" + @@ -184,6 +105,172 @@ public void nestedSortTest4(){ "}"; Assert.assertEquals(expected,actual); } + @Test + public void nestedSortRealTest(){ + String sql = "select * from test where nested(info,info.name=term(\"java\") and info.name=term(\"python\") or info.name=term(\"Hadoop\")) order by nested(info,sum(info.age),info.name=matchQuery(\"java\") and info.name=term(\"python\") and info.name=\"Hadoop\") desc,score desc"; + String actual = exec(sql); + String expected = "{\n" + + " \"from\" : 0,\n" + + " \"size\" : 200,\n" + + " \"query\" : {\n" + + " \"bool\" : {\n" + + " \"must\" : {\n" + + " \"nested\" : {\n" + + " \"query\" : {\n" + + " \"bool\" : {\n" + + " \"must\" : {\n" + + " \"bool\" : {\n" + + " \"should\" : [ {\n" + + " \"bool\" : {\n" + + " \"must\" : [ {\n" + + " \"term\" : {\n" + + " \"info.name\" : \"java\"\n" + + " }\n" + + " }, {\n" + + " \"term\" : {\n" + + " \"info.name\" : \"python\"\n" + + " }\n" + + " } ]\n" + + " }\n" + + " }, {\n" + + " \"term\" : {\n" + + " \"info.name\" : \"Hadoop\"\n" + + " }\n" + + " } ]\n" + + " }\n" + + " }\n" + + " }\n" + + " },\n" + + " \"path\" : \"info\"\n" + + " }\n" + + " }\n" + + " }\n" + + " },\n" + + " \"sort\" : [ {\n" + + " \"info.age\" : {\n" + + " \"order\" : \"desc\",\n" + + " \"missing\" : \"_last\",\n" + + " \"mode\" : \"SUM\",\n" + + " \"nested_filter\" : {\n" + + " \"bool\" : {\n" + + " \"should\" : [ {\n" + + " \"match\" : {\n" + + " \"info.name\" : {\n" + + " \"query\" : \"'java'\",\n" + + " \"type\" : \"boolean\"\n" + + " }\n" + + " }\n" + + " }, {\n" + + " \"term\" : {\n" + + " \"info.name\" : \"python\"\n" + + " }\n" + + " }, {\n" + + " \"match\" : {\n" + + " \"info.name\" : {\n" + + " \"query\" : \"Hadoop\",\n" + + " \"type\" : \"phrase\"\n" + + " }\n" + + " }\n" + + " } ]\n" + + " }\n" + + " },\n" + + " \"nested_path\" : \"info\"\n" + + " }\n" + + " }, {\n" + + " \"score\" : {\n" + + " \"order\" : \"desc\"\n" + + " }\n" + + " } ]\n" + + "}"; + Assert.assertEquals(expected,actual); + } + + @Test + public void nestedSortFactTest(){ + String sql = "SELECT * FROM fact_index where nested(interest_keyword,interest_keyword.keyword=matchQuery(\"黑白\")) and nested(interest_keyword,interest_keyword.keyword=matchQuery(\"纪实\")) and province = \"北京\" order by nested(interest_keyword, sum(interest_keyword.score),interest_keyword.keyword=matchQuery(\"黑白\") and interest_keyword.keyword=matchQuery(\"纪实\")) desc"; + String actual = exec(sql); + String expected = "{\n" + + " \"from\" : 0,\n" + + " \"size\" : 200,\n" + + " \"query\" : {\n" + + " \"bool\" : {\n" + + " \"must\" : {\n" + + " \"bool\" : {\n" + + " \"must\" : [ {\n" + + " \"nested\" : {\n" + + " \"query\" : {\n" + + " \"bool\" : {\n" + + " \"must\" : {\n" + + " \"match\" : {\n" + + " \"interest_keyword.keyword\" : {\n" + + " \"query\" : \"黑白\",\n" + + " \"type\" : \"boolean\"\n" + + " }\n" + + " }\n" + + " }\n" + + " }\n" + + " },\n" + + " \"path\" : \"interest_keyword\"\n" + + " }\n" + + " }, {\n" + + " \"nested\" : {\n" + + " \"query\" : {\n" + + " \"bool\" : {\n" + + " \"must\" : {\n" + + " \"match\" : {\n" + + " \"interest_keyword.keyword\" : {\n" + + " \"query\" : \"纪实\",\n" + + " \"type\" : \"boolean\"\n" + + " }\n" + + " }\n" + + " }\n" + + " }\n" + + " },\n" + + " \"path\" : \"interest_keyword\"\n" + + " }\n" + + " }, {\n" + + " \"match\" : {\n" + + " \"province\" : {\n" + + " \"query\" : \"北京\",\n" + + " \"type\" : \"phrase\"\n" + + " }\n" + + " }\n" + + " } ]\n" + + " }\n" + + " }\n" + + " }\n" + + " },\n" + + " \"sort\" : [ {\n" + + " \"interest_keyword.score\" : {\n" + + " \"order\" : \"desc\",\n" + + " \"missing\" : \"_last\",\n" + + " \"mode\" : \"SUM\",\n" + + " \"nested_filter\" : {\n" + + " \"bool\" : {\n" + + " \"should\" : [ {\n" + + " \"match\" : {\n" + + " \"interest_keyword.keyword\" : {\n" + + " \"query\" : \"'黑白'\",\n" + + " \"type\" : \"boolean\"\n" + + " }\n" + + " }\n" + + " }, {\n" + + " \"match\" : {\n" + + " \"interest_keyword.keyword\" : {\n" + + " \"query\" : \"'纪实'\",\n" + + " \"type\" : \"boolean\"\n" + + " }\n" + + " }\n" + + " } ]\n" + + " }\n" + + " },\n" + + " \"nested_path\" : \"interest_keyword\"\n" + + " }\n" + + " } ]\n" + + "}"; + Assert.assertEquals(expected,actual); + } + public static String exec(String sql){ String jsonExplanation = ""; From 26b440a82d80249fbe60d599963f3abb6162a296 Mon Sep 17 00:00:00 2001 From: gitxxx Date: Thu, 15 Dec 2016 18:19:25 +0800 Subject: [PATCH 21/23] fix type bug --- .../nlpcn/es4sql/query/maker/SortMaker.java | 19 +++++++++++++++++-- .../java/org/nlpcn/es4sql/NestedSortTest.java | 5 +++-- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/src/main/java/org/nlpcn/es4sql/query/maker/SortMaker.java b/src/main/java/org/nlpcn/es4sql/query/maker/SortMaker.java index 7ac3ef10..60d63cfb 100644 --- a/src/main/java/org/nlpcn/es4sql/query/maker/SortMaker.java +++ b/src/main/java/org/nlpcn/es4sql/query/maker/SortMaker.java @@ -1,6 +1,11 @@ package org.nlpcn.es4sql.query.maker; +import com.alibaba.druid.sql.ast.SQLExpr; +import com.alibaba.druid.sql.ast.expr.SQLCharExpr; +import com.alibaba.druid.sql.ast.expr.SQLIdentifierExpr; import com.alibaba.druid.sql.ast.expr.SQLMethodInvokeExpr; +import com.alibaba.druid.sql.ast.expr.SQLTextLiteralExpr; +import com.alibaba.druid.sql.visitor.functions.Char; import org.elasticsearch.index.query.BoolQueryBuilder; import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.index.query.QueryBuilders; @@ -59,9 +64,11 @@ public static void setCondition(List conditions, BoolQueryBuilder queryBu methodName = "term"; } } else if (vals instanceof SQLMethodInvokeExpr) { + //matchQuery methodName = ((SQLMethodInvokeExpr) vals).getMethodName(); - val = ((SQLMethodInvokeExpr) vals).getParameters().get(0).toString(); + val = getValue(((SQLMethodInvokeExpr) vals).getParameters().get(0)); } else { + //match_phrase methodName = ""; val = vals.toString(); } @@ -73,7 +80,15 @@ public static void setCondition(List conditions, BoolQueryBuilder queryBu } } - + public static String getValue(SQLExpr sqlExpr){ + String retStr=""; + if (sqlExpr instanceof SQLCharExpr) { + retStr = ((SQLCharExpr) sqlExpr).getValue().toString(); + } else if (sqlExpr instanceof SQLIdentifierExpr) { + retStr = ((SQLIdentifierExpr) sqlExpr).getName().toString(); + } + return retStr; + } public static BoolQueryBuilder explanSort(BoolQueryBuilder queryBuilder, String methodName, String key, String value) { if (methodName.equals("")) { QueryBuilder termQueryBuilder = QueryBuilders.matchPhraseQuery(key, value); diff --git a/src/test/java/org/nlpcn/es4sql/NestedSortTest.java b/src/test/java/org/nlpcn/es4sql/NestedSortTest.java index 35b2bb0a..c49d1ec3 100644 --- a/src/test/java/org/nlpcn/es4sql/NestedSortTest.java +++ b/src/test/java/org/nlpcn/es4sql/NestedSortTest.java @@ -16,6 +16,7 @@ * Created by fangbb on 2016-12-12. */ public class NestedSortTest { + @Test public void nestedSort4MatchQueryTest(){ String sql = "SELECT * FROM test order by nested(a,sum(a.b),a.c=matchQuery(\"百度和谷歌\"))"; @@ -33,7 +34,7 @@ public void nestedSort4MatchQueryTest(){ " \"should\" : {\n" + " \"match\" : {\n" + " \"a.c\" : {\n" + - " \"query\" : \"'百度和谷歌'\",\n" + + " \"query\" : \"百度和谷歌\",\n" + " \"type\" : \"boolean\"\n" + " }\n" + " }\n" + @@ -156,7 +157,7 @@ public void nestedSortRealTest(){ " \"should\" : [ {\n" + " \"match\" : {\n" + " \"info.name\" : {\n" + - " \"query\" : \"'java'\",\n" + + " \"query\" : \"java\",\n" + " \"type\" : \"boolean\"\n" + " }\n" + " }\n" + From ad35623259772d4c3401c9121a3db1ce41a37e6c Mon Sep 17 00:00:00 2001 From: gitxxx Date: Fri, 16 Dec 2016 12:35:28 +0800 Subject: [PATCH 22/23] modify es-sql init config --- .../preAnalyzer/AnsjElasticConfigurator.java | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/src/main/java/org/elasticsearch/plugin/nlpcn/preAnalyzer/AnsjElasticConfigurator.java b/src/main/java/org/elasticsearch/plugin/nlpcn/preAnalyzer/AnsjElasticConfigurator.java index 2bb6b821..dee56f9b 100644 --- a/src/main/java/org/elasticsearch/plugin/nlpcn/preAnalyzer/AnsjElasticConfigurator.java +++ b/src/main/java/org/elasticsearch/plugin/nlpcn/preAnalyzer/AnsjElasticConfigurator.java @@ -17,18 +17,21 @@ public class AnsjElasticConfigurator { public static void init(Settings settings) { try { - ES_IP = settings.get("network.host"); + ES_IP = settings.get("ansj.host"); ES_PORT = settings.get("http.port"); - logger.info("es 本地IP:" + ES_IP); - logger.info("es 端口:"+ ES_PORT); - if (ES_IP==null || ES_IP.equals("")){ - logger.error("本地IP获取失败"); + if (ES_IP == null || ES_IP.equals("")) { + logger.error("ansj.host获取失败"); + } else { + logger.info("ansj.host:" + ES_IP); } - if (ES_PORT==null || ES_PORT.equals("")){ - logger.error("ES服务获取失败"); + if (ES_PORT == null || ES_PORT.equals("")) { + ES_PORT = "9200"; + logger.error("ES服务获取失败,使用默认端口:9200"); + } else { + logger.info("http.port:"+ ES_PORT); } } catch (Exception e) { - logger.error("本地IP和ES端口获取失败"); + logger.error(e.getMessage()); } } } From b667d6b6ccbb9ad4c9c5c6ddf1abae629bbcf1ee Mon Sep 17 00:00:00 2001 From: gitxxx Date: Fri, 16 Dec 2016 13:18:05 +0800 Subject: [PATCH 23/23] modify init config --- .../plugin/nlpcn/preAnalyzer/AnsjElasticConfigurator.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/main/java/org/elasticsearch/plugin/nlpcn/preAnalyzer/AnsjElasticConfigurator.java b/src/main/java/org/elasticsearch/plugin/nlpcn/preAnalyzer/AnsjElasticConfigurator.java index dee56f9b..a8c5bcff 100644 --- a/src/main/java/org/elasticsearch/plugin/nlpcn/preAnalyzer/AnsjElasticConfigurator.java +++ b/src/main/java/org/elasticsearch/plugin/nlpcn/preAnalyzer/AnsjElasticConfigurator.java @@ -17,16 +17,16 @@ public class AnsjElasticConfigurator { public static void init(Settings settings) { try { - ES_IP = settings.get("ansj.host"); + ES_IP = settings.get("network.host"); ES_PORT = settings.get("http.port"); if (ES_IP == null || ES_IP.equals("")) { - logger.error("ansj.host获取失败"); + logger.error("network.host获取失败"); } else { - logger.info("ansj.host:" + ES_IP); + logger.info("network.host:" + ES_IP); } if (ES_PORT == null || ES_PORT.equals("")) { ES_PORT = "9200"; - logger.error("ES服务获取失败,使用默认端口:9200"); + logger.error("http.port获取失败,使用默认端口:9200"); } else { logger.info("http.port:"+ ES_PORT); }