From c2c5ecf6af1ce169468ad0adf68a0b827df32efb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jan=20Bedna=C5=99=C3=ADk?= <jan.bednarik@gmail.com>
Date: Wed, 29 Nov 2017 23:00:48 +0100
Subject: [PATCH] Custom thesaurus of czech synonyms.

---
 Dockerfile                 |  1 +
 README.md                  | 10 +++++++++-
 analysis/cs_CZ/synonym.txt |  8 ++++++++
 3 files changed, 18 insertions(+), 1 deletion(-)
 create mode 100644 analysis/cs_CZ/synonym.txt

diff --git a/Dockerfile b/Dockerfile
index 016a5e5..84f2356 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -4,3 +4,4 @@ RUN elasticsearch-plugin remove --purge x-pack
 RUN elasticsearch-plugin install analysis-icu
 
 ADD hunspell/ /usr/share/elasticsearch/config/hunspell/
+ADD analysis/ /usr/share/elasticsearch/config/analysis/
diff --git a/README.md b/README.md
index ac6a62e..063f786 100644
--- a/README.md
+++ b/README.md
@@ -30,7 +30,11 @@ Create Index with these settings:
                     'type': 'hunspell',
                     'locale': 'cs_CZ',
                     'dedup': True,
-                }
+                },
+                'czech_synonym': {
+                    'type': 'synonym',
+                    'synonyms_path': 'analysis/cs_CZ/synonym.txt',
+                },
             },
             'analyzer': {
                 'czech': {
@@ -38,6 +42,7 @@ Create Index with these settings:
                     'filter': [
                         'icu_folding',
                         'lowercase',
+                        'czech_synonym',
                         'czech_stop',
                         'czech_stemmer',
                         'cs_CZ',
@@ -51,6 +56,9 @@ Create Index with these settings:
 
 You can use `'czech'` analyzer on text fields now.
 
+There is custom dictionary of synonyms included. You can adjust it for your
+needs or remove it from analyzer settings.
+
 ## Build
 
 If you don't want to use pre-built container from
diff --git a/analysis/cs_CZ/synonym.txt b/analysis/cs_CZ/synonym.txt
new file mode 100644
index 0000000..a76a5e2
--- /dev/null
+++ b/analysis/cs_CZ/synonym.txt
@@ -0,0 +1,8 @@
+# Czech Synonyms in Solr format
+# =============================
+#
+# Write comma separated list of words without diacritic in lowercase.
+# One set of synonyms per line.
+
+kava, kafe
+tramvaj, salina
-- 
GitLab