Skip to content

Commit d0fc829

Browse files
committed
Analysis: Add arabic, brazilian, czech to stemmer token filter language options, closes elastic#1519.
1 parent 2871ecc commit d0fc829

File tree

1 file changed

+11
-2
lines changed

1 file changed

+11
-2
lines changed

modules/elasticsearch/src/main/java/org/elasticsearch/index/analysis/StemmerTokenFilterFactory.java

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,12 @@
2121

2222
import org.apache.lucene.analysis.PorterStemFilter;
2323
import org.apache.lucene.analysis.TokenStream;
24+
import org.apache.lucene.analysis.ar.ArabicStemFilter;
25+
import org.apache.lucene.analysis.br.BrazilianStemFilter;
26+
import org.apache.lucene.analysis.cz.CzechStemFilter;
2427
import org.apache.lucene.analysis.de.GermanLightStemFilter;
2528
import org.apache.lucene.analysis.de.GermanMinimalStemFilter;
29+
import org.apache.lucene.analysis.el.GreekStemFilter;
2630
import org.apache.lucene.analysis.en.EnglishMinimalStemFilter;
2731
import org.apache.lucene.analysis.en.EnglishPossessiveFilter;
2832
import org.apache.lucene.analysis.en.KStemFilter;
@@ -38,7 +42,6 @@
3842
import org.apache.lucene.analysis.pt.PortugueseMinimalStemFilter;
3943
import org.apache.lucene.analysis.pt.PortugueseStemFilter;
4044
import org.apache.lucene.analysis.ru.RussianLightStemFilter;
41-
import org.apache.lucene.analysis.el.GreekStemFilter;
4245
import org.apache.lucene.analysis.snowball.SnowballFilter;
4346
import org.elasticsearch.common.Strings;
4447
import org.elasticsearch.common.inject.Inject;
@@ -60,12 +63,18 @@ public class StemmerTokenFilterFactory extends AbstractTokenFilterFactory {
6063
}
6164

6265
@Override public TokenStream create(TokenStream tokenStream) {
63-
if ("armenian".equalsIgnoreCase(language)) {
66+
if ("arabic".equalsIgnoreCase(language)) {
67+
return new ArabicStemFilter(tokenStream);
68+
} else if ("armenian".equalsIgnoreCase(language)) {
6469
return new SnowballFilter(tokenStream, new ArmenianStemmer());
6570
} else if ("basque".equalsIgnoreCase(language)) {
6671
return new SnowballFilter(tokenStream, new BasqueStemmer());
72+
} else if ("brazilian".equalsIgnoreCase(language)) {
73+
return new BrazilianStemFilter(tokenStream);
6774
} else if ("catalan".equalsIgnoreCase(language)) {
6875
return new SnowballFilter(tokenStream, new CatalanStemmer());
76+
} else if ("czech".equalsIgnoreCase(language)) {
77+
return new CzechStemFilter(tokenStream);
6978
} else if ("danish".equalsIgnoreCase(language)) {
7079
return new SnowballFilter(tokenStream, new DanishStemmer());
7180
} else if ("dutch".equalsIgnoreCase(language)) {

0 commit comments

Comments
 (0)