Update elasticsearch

- Build our own elasticsearch image based on 6.8.6 which includes
configuration files for german decompounder from https://github.com/uschindler/german-decompounder

- move elastichq to separate service and use the official image instead of including it as plugin in the elasticsearch image

- Adjust compounder filter config in papers to use the included hyphenation_decompounder.
See https://www.elastic.co/guide/en/elasticsearch/reference/6.8/analysis-compound-word-tokenfilter.html

- Remove obsolete "string" type in the mapping of papers, and replace it
with "text" or "keywords" accordingly. See https://www.elastic.co/blog/strings-are-dead-long-live-strings
This commit is contained in:
Lars Henrik Mai 2020-01-16 11:20:03 +01:00
parent 5dad333655
commit dc3a97d3b8
5 changed files with 80 additions and 62 deletions

View file

@ -31,8 +31,8 @@ gem 'omniauth'
gem 'omniauth-browserid' gem 'omniauth-browserid'
gem 'elasticsearch', '~> 6' gem 'elasticsearch', '~> 6'
gem 'elasticsearch-model' gem 'elasticsearch-model', '~> 6'
gem 'elasticsearch-rails' gem 'elasticsearch-rails', '~> 6'
gem 'elasticsearch-dsl' gem 'elasticsearch-dsl'
gem 'leaflet-rails' gem 'leaflet-rails'

View file

@ -79,11 +79,11 @@ GEM
elasticsearch-api (6.0.2) elasticsearch-api (6.0.2)
multi_json multi_json
elasticsearch-dsl (0.1.5) elasticsearch-dsl (0.1.5)
elasticsearch-model (5.0.0) elasticsearch-model (6.1.0)
activesupport (> 3) activesupport (> 3)
elasticsearch (> 1) elasticsearch (> 1)
hashie hashie
elasticsearch-rails (5.0.2) elasticsearch-rails (6.1.0)
elasticsearch-transport (6.0.2) elasticsearch-transport (6.0.2)
faraday faraday
multi_json multi_json
@ -287,8 +287,8 @@ DEPENDENCIES
database_cleaner database_cleaner
elasticsearch (~> 6) elasticsearch (~> 6)
elasticsearch-dsl elasticsearch-dsl
elasticsearch-model elasticsearch-model (~> 6)
elasticsearch-rails elasticsearch-rails (~> 6)
factory_bot_rails factory_bot_rails
faker faker
foundation-rails (~> 5.5) foundation-rails (~> 5.5)

View file

@ -33,9 +33,13 @@ class Paper < ActiveRecord::Base
type: "stemmer", type: "stemmer",
language: "light_german" language: "light_german"
}, },
decomp: { german_decompounder: {
type: "decompound" type: "hyphenation_decompounder",
} word_list_path: "analysis/dictionary-de.txt",
hyphenation_patterns_path: "analysis/de_DR.xml",
only_longest_match: true,
min_subword_size: 4
},
}, },
analyzer: { analyzer: {
german: { german: {
@ -43,21 +47,21 @@ class Paper < ActiveRecord::Base
filter: [ filter: [
"lowercase", "lowercase",
"german_stop", "german_stop",
"german_decompounder",
"german_normalization", "german_normalization",
"german_stemmer", "german_stemmer"
"decomp"
] ]
} }
} }
} }
} do mappings dynamic: false do } do mappings dynamic: false do
indexes :name, type: :string, analyzer: "german" indexes :name, type: :text, analyzer: "german"
indexes :content, type: :string, analyzer: "german" indexes :content, type: :text, analyzer: "german"
indexes :resolution, type: :string, analyzer: "german" indexes :resolution, type: :text, analyzer: "german"
indexes :reference, type: :string, index: :not_analyzed indexes :reference, type: :keyword, index: true
indexes :paper_type, type: :string, index: :not_analyzed indexes :paper_type, type: :keyword, index: true
indexes :published_at, type: :date, index: :not_analyzed indexes :published_at, type: :date, index: true
indexes :originator, type: :string, index: :not_analyzed indexes :originator, type: :keyword, index: true
end end
end end

View file

@ -1,4 +1,8 @@
web: version: "3.7"
volumes:
elasticsearch:
services:
web:
build: . build: .
volumes: volumes:
- .:/app - .:/app
@ -8,15 +12,17 @@ web:
- elasticsearch - elasticsearch
environment: environment:
ELASTICSEARCH_URL: 'http://elasticsearch:9200' ELASTICSEARCH_URL: 'http://elasticsearch:9200'
elasticsearch: elasticsearch:
image: elasticsearch:5.4.3 build: ./docker/elasticsearch
command: environment:
- sh - discovery.type=single-node
- -c
- "./bin/elasticsearch-plugin install http://xbib.org/repository/org/xbib/elasticsearch/plugin/elasticsearch-analysis-decompound/5.4.3.0/elasticsearch-analysis-decompound-5.4.3.0-plugin.zip;
./bin/elasticsearch-plugin install https://github.com/royrusso/elasticsearch-HQ/zipball/master;
/docker-entrypoint.sh elasticsearch"
ports: ports:
- "9200:9200" - "9200:9200"
volumes: volumes:
- .:/apps - elasticsearch:/usr/share/elasticsearch/data
elastichq:
image: elastichq/elasticsearch-hq
ports:
- "5000:5000"
links:
- elasticsearch

View file

@ -0,0 +1,8 @@
FROM elasticsearch:6.8.6
RUN \
mkdir -p /usr/share/elasticsearch/config/analysis && \
pushd /usr/share/elasticsearch/config/analysis && \
curl -sSLO https://github.com/uschindler/german-decompounder/raw/master/de_DR.xml && \
curl -sSLO https://github.com/uschindler/german-decompounder/raw/master/dictionary-de.txt && \
popd