diff --git a/Dockerfile b/Dockerfile index 016a5e57648b5d1d05e089f76b374a5cffeb67a8..84f23562f9143375eaefbe9b03918006bc270437 100644 --- a/Dockerfile +++ b/Dockerfile @@ -4,3 +4,4 @@ RUN elasticsearch-plugin remove --purge x-pack RUN elasticsearch-plugin install analysis-icu ADD hunspell/ /usr/share/elasticsearch/config/hunspell/ +ADD analysis/ /usr/share/elasticsearch/config/analysis/ diff --git a/README.md b/README.md index ac6a62e9622bfcfa71fef13dfdfd72361ba9c88d..063f78617aa0259d12da749e2760993429b29a4b 100644 --- a/README.md +++ b/README.md @@ -30,7 +30,11 @@ Create Index with these settings: 'type': 'hunspell', 'locale': 'cs_CZ', 'dedup': True, - } + }, + 'czech_synonym': { + 'type': 'synonym', + 'synonyms_path': 'analysis/cs_CZ/synonym.txt', + }, }, 'analyzer': { 'czech': { @@ -38,6 +42,7 @@ Create Index with these settings: 'filter': [ 'icu_folding', 'lowercase', + 'czech_synonym', 'czech_stop', 'czech_stemmer', 'cs_CZ', @@ -51,6 +56,9 @@ Create Index with these settings: You can use `'czech'` analyzer on text fields now. +There is custom dictionary of synonyms included. You can adjust it for your +needs or remove it from analyzer settings. + ## Build If you don't want to use pre-built container from diff --git a/analysis/cs_CZ/synonym.txt b/analysis/cs_CZ/synonym.txt new file mode 100644 index 0000000000000000000000000000000000000000..a76a5e2b52e74907932aa2c0ed7554c01708fbb1 --- /dev/null +++ b/analysis/cs_CZ/synonym.txt @@ -0,0 +1,8 @@ +# Czech Synonyms in Solr format +# ============================= +# +# Write comma separated list of words without diacritic in lowercase. +# One set of synonyms per line. + +kava, kafe +tramvaj, salina