Skip to content

Commit 8bad607

Browse files
lukas-vlcekkimchy
authored andcommitted
Added tests for synonym parsers.
1 parent 98504dd commit 8bad607

File tree

6 files changed

+179
-12
lines changed

6 files changed

+179
-12
lines changed

modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/Analysis.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -243,13 +243,13 @@ public static List<String> loadWordList(Reader reader, String comment) throws IO
243243
}
244244

245245
/**
246-
* @return null If no settings set for "settingsPrefix + _path" then return null.
246+
* @return null If no settings set for "settingsPrefix" then return <code>null</code>.
247247
*
248248
* @throws ElasticSearchIllegalArgumentException
249249
* If the Reader can not be instantiated.
250250
*/
251-
public static Reader getFileReader(Environment env, Settings settings, String settingPrefix) {
252-
String filePath = settings.get(settingPrefix + "_path", null);
251+
public static Reader getReaderFromFile(Environment env, Settings settings, String settingPrefix) {
252+
String filePath = settings.get(settingPrefix, null);
253253

254254
if (filePath == null) {
255255
return null;

modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/SynonymTokenFilterFactory.java

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@
2929
import org.apache.lucene.analysis.synonym.SynonymFilter;
3030
import org.apache.lucene.analysis.synonym.SynonymMap;
3131
import org.apache.lucene.analysis.synonym.WordnetSynonymParser;
32-
import org.apache.lucene.util.CharsRef;
3332
import org.elasticsearch.ElasticSearchIllegalArgumentException;
3433
import org.elasticsearch.common.inject.Inject;
3534
import org.elasticsearch.common.inject.assistedinject.Assisted;
@@ -40,12 +39,8 @@
4039
import org.elasticsearch.index.settings.IndexSettings;
4140
import org.elasticsearch.indices.analysis.IndicesAnalysisService;
4241

43-
import java.io.BufferedReader;
44-
import java.io.IOException;
45-
import java.io.LineNumberReader;
4642
import java.io.Reader;
47-
import java.text.ParseException;
48-
import java.util.ArrayList;
43+
import java.io.StringReader;
4944
import java.util.List;
5045
import java.util.Map;
5146

@@ -59,10 +54,20 @@ public class SynonymTokenFilterFactory extends AbstractTokenFilterFactory {
5954
@Assisted String name, @Assisted Settings settings) {
6055
super(index, indexSettings, name, settings);
6156

62-
Reader rulesReader = Analysis.getFileReader(env, settings, "synonyms");
63-
if (rulesReader == null) {
57+
Reader rulesReader = null;
58+
if (settings.getAsArray("synonyms", null) != null) {
59+
List<String> rules = Analysis.getWordList(env, settings, "synonyms");
60+
StringBuilder sb = new StringBuilder();
61+
for (String line : rules) {
62+
sb.append(line).append(System.getProperty("line.separator"));
63+
}
64+
rulesReader = new StringReader(sb.toString());
65+
} else if (settings.get("synonyms_path") != null) {
66+
rulesReader = Analysis.getReaderFromFile(env, settings, "synonyms_path");
67+
} else {
6468
throw new ElasticSearchIllegalArgumentException("synonym requires either `synonyms` or `synonyms_path` to be configured");
6569
}
70+
6671
this.ignoreCase = settings.getAsBoolean("ignore_case", false);
6772
boolean expand = settings.getAsBoolean("expand", true);
6873

@@ -89,13 +94,14 @@ protected TokenStreamComponents createComponents(String fieldName, Reader reader
8994
try {
9095
SynonymMap.Builder parser = null;
9196

92-
if (settings.get("format","wordnet").equalsIgnoreCase("wordnet")) {
97+
if ("wordnet".equalsIgnoreCase(settings.get("format"))) {
9398
parser = new WordnetSynonymParser(true, expand, analyzer);
9499
((WordnetSynonymParser)parser).add(rulesReader);
95100
} else {
96101
parser = new SolrSynonymParser(true, expand, analyzer);
97102
((SolrSynonymParser)parser).add(rulesReader);
98103
}
104+
99105
synonymMap = parser.build();
100106
} catch (Exception e) {
101107
throw new ElasticSearchIllegalArgumentException("failed to build synonyms", e);
Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
/*
2+
* Licensed to Elastic Search and Shay Banon under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. Elastic Search licenses this
6+
* file to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
package org.elasticsearch.index.analysis.synonyms;
21+
22+
import org.apache.lucene.analysis.Analyzer;
23+
import org.apache.lucene.analysis.TokenStream;
24+
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
25+
import org.elasticsearch.common.inject.Injector;
26+
import org.elasticsearch.common.inject.ModulesBuilder;
27+
import org.elasticsearch.common.logging.ESLogger;
28+
import org.elasticsearch.common.logging.Loggers;
29+
import org.elasticsearch.common.lucene.all.AllEntries;
30+
import org.elasticsearch.common.lucene.all.AllTokenStream;
31+
import org.elasticsearch.common.settings.Settings;
32+
import org.elasticsearch.common.settings.SettingsModule;
33+
import org.elasticsearch.env.Environment;
34+
import org.elasticsearch.env.EnvironmentModule;
35+
import org.elasticsearch.index.Index;
36+
import org.elasticsearch.index.IndexNameModule;
37+
import org.elasticsearch.index.analysis.AnalysisModule;
38+
import org.elasticsearch.index.analysis.AnalysisService;
39+
import org.elasticsearch.index.settings.IndexSettingsModule;
40+
import org.elasticsearch.indices.analysis.IndicesAnalysisModule;
41+
import org.elasticsearch.indices.analysis.IndicesAnalysisService;
42+
import org.hamcrest.MatcherAssert;
43+
import org.testng.annotations.Test;
44+
45+
import java.io.IOException;
46+
47+
import static org.elasticsearch.common.settings.ImmutableSettings.settingsBuilder;
48+
import static org.hamcrest.Matchers.*;
49+
50+
/**
51+
* @author Lukas Vlcek
52+
*/
53+
public class SynonymsAnalysisTest {
54+
55+
protected final ESLogger logger = Loggers.getLogger(getClass());
56+
private AnalysisService analysisService;
57+
58+
@Test public void testSynonymsAnalysis() throws IOException {
59+
60+
Settings settings = settingsBuilder().loadFromClasspath("org/elasticsearch/index/analysis/synonyms/synonyms.json").build();
61+
62+
Index index = new Index("test");
63+
64+
Injector parentInjector = new ModulesBuilder().add(
65+
new SettingsModule(settings),
66+
new EnvironmentModule(new Environment(settings)),
67+
new IndicesAnalysisModule())
68+
.createInjector();
69+
Injector injector = new ModulesBuilder().add(
70+
new IndexSettingsModule(index, settings),
71+
new IndexNameModule(index),
72+
new AnalysisModule(settings, parentInjector.getInstance(IndicesAnalysisService.class)))
73+
.createChildInjector(parentInjector);
74+
75+
analysisService = injector.getInstance(AnalysisService.class);
76+
77+
match("synonymAnalyzer", "kimchy is the dude abides", "shay is the elasticsearch man!");
78+
match("synonymAnalyzer_file", "kimchy is the dude abides", "shay is the elasticsearch man!");
79+
match("synonymAnalyzerWordnet", "abstain", "abstain refrain desist");
80+
match("synonymAnalyzerWordnet_file", "abstain", "abstain refrain desist");
81+
82+
}
83+
84+
private void match(String analyzerName, String source, String target) throws IOException {
85+
86+
Analyzer analyzer = analysisService.analyzer(analyzerName).analyzer();
87+
88+
AllEntries allEntries = new AllEntries();
89+
allEntries.addText("field", source, 1.0f);
90+
allEntries.reset();
91+
92+
TokenStream stream = AllTokenStream.allTokenStream("_all", allEntries, analyzer);
93+
TermAttribute termAtt = stream.addAttribute(TermAttribute.class);
94+
95+
StringBuilder sb = new StringBuilder();
96+
while (stream.incrementToken()) {
97+
sb.append(termAtt.term()).append(" ");
98+
}
99+
100+
MatcherAssert.assertThat(target, equalTo(sb.toString().trim()));
101+
}
102+
103+
}
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
{
2+
"index" : {
3+
"analysis" : {
4+
"analyzer" : {
5+
"synonymAnalyzer" : {
6+
"tokenizer" : "standard",
7+
"filter" : [ "synonym" ]
8+
},
9+
"synonymAnalyzer_file" : {
10+
"tokenizer" : "standard",
11+
"filter" : [ "synonym_file" ]
12+
},
13+
"synonymAnalyzerWordnet" : {
14+
"tokenizer" : "standard",
15+
"filter" : [ "synonymWordnet" ]
16+
},
17+
"synonymAnalyzerWordnet_file" : {
18+
"tokenizer" : "standard",
19+
"filter" : [ "synonymWordnet_file" ]
20+
}
21+
},
22+
"filter" : {
23+
"synonym" : {
24+
"type" : "synonym",
25+
"synonyms" : [
26+
"kimchy => shay",
27+
"dude => elasticsearch",
28+
"abides => man!"
29+
]
30+
},
31+
"synonym_file" : {
32+
"type" : "synonym",
33+
"synonyms_path" : "org/elasticsearch/index/analysis/synonyms/synonyms.txt"
34+
},
35+
"synonymWordnet" : {
36+
"type" : "synonym",
37+
"format" : "wordnet",
38+
"synonyms" : [
39+
"s(100000001,1,'abstain',v,1,0).",
40+
"s(100000001,2,'refrain',v,1,0).",
41+
"s(100000001,3,'desist',v,1,0)."
42+
]
43+
},
44+
"synonymWordnet_file" : {
45+
"type" : "synonym",
46+
"format" : "wordnet",
47+
"synonyms_path" : "org/elasticsearch/index/analysis/synonyms/synonyms_wordnet.txt"
48+
}
49+
}
50+
}
51+
}
52+
}
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
kimchy => shay
2+
dude => elasticsearch
3+
abides => man!
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
s(100000001,1,'abstain',v,1,0).
2+
s(100000001,2,'refrain',v,1,0).
3+
s(100000001,3,'desist',v,1,0).

0 commit comments

Comments
 (0)