mirror of
https://github.com/CodeforLeipzig/stadtratmonitor.git
synced 2024-12-22 15:43:14 +01:00
Update elasticsearch
- Build our own elasticsearch image based on 6.8.6 which includes configuration files for german decompounder from https://github.com/uschindler/german-decompounder - move elastichq to separate service and use the official image instead of including it as plugin in the elasticsearch image - Adjust compounder filter config in papers to use the included hyphenation_decompounder. See https://www.elastic.co/guide/en/elasticsearch/reference/6.8/analysis-compound-word-tokenfilter.html - Remove obsolete "string" type in the mapping of papers, and replace it with "text" or "keywords" accordingly. See https://www.elastic.co/blog/strings-are-dead-long-live-strings
This commit is contained in:
parent
5dad333655
commit
dc3a97d3b8
5 changed files with 80 additions and 62 deletions
4
Gemfile
4
Gemfile
|
@ -31,8 +31,8 @@ gem 'omniauth'
|
|||
gem 'omniauth-browserid'
|
||||
|
||||
gem 'elasticsearch', '~> 6'
|
||||
gem 'elasticsearch-model'
|
||||
gem 'elasticsearch-rails'
|
||||
gem 'elasticsearch-model', '~> 6'
|
||||
gem 'elasticsearch-rails', '~> 6'
|
||||
gem 'elasticsearch-dsl'
|
||||
|
||||
gem 'leaflet-rails'
|
||||
|
|
|
@ -79,11 +79,11 @@ GEM
|
|||
elasticsearch-api (6.0.2)
|
||||
multi_json
|
||||
elasticsearch-dsl (0.1.5)
|
||||
elasticsearch-model (5.0.0)
|
||||
elasticsearch-model (6.1.0)
|
||||
activesupport (> 3)
|
||||
elasticsearch (> 1)
|
||||
hashie
|
||||
elasticsearch-rails (5.0.2)
|
||||
elasticsearch-rails (6.1.0)
|
||||
elasticsearch-transport (6.0.2)
|
||||
faraday
|
||||
multi_json
|
||||
|
@ -287,8 +287,8 @@ DEPENDENCIES
|
|||
database_cleaner
|
||||
elasticsearch (~> 6)
|
||||
elasticsearch-dsl
|
||||
elasticsearch-model
|
||||
elasticsearch-rails
|
||||
elasticsearch-model (~> 6)
|
||||
elasticsearch-rails (~> 6)
|
||||
factory_bot_rails
|
||||
faker
|
||||
foundation-rails (~> 5.5)
|
||||
|
|
|
@ -22,42 +22,46 @@ class Paper < ActiveRecord::Base
|
|||
index_name ['srm', Rails.env, self.base_class.to_s.pluralize.underscore].join('_')
|
||||
|
||||
settings index: {
|
||||
number_of_shards: 1,
|
||||
analysis: {
|
||||
filter: {
|
||||
german_stop: {
|
||||
type: "stop",
|
||||
stopwords: "_german_"
|
||||
},
|
||||
german_stemmer: {
|
||||
type: "stemmer",
|
||||
language: "light_german"
|
||||
},
|
||||
decomp: {
|
||||
type: "decompound"
|
||||
}
|
||||
number_of_shards: 1,
|
||||
analysis: {
|
||||
filter: {
|
||||
german_stop: {
|
||||
type: "stop",
|
||||
stopwords: "_german_"
|
||||
},
|
||||
analyzer: {
|
||||
german: {
|
||||
tokenizer: "standard",
|
||||
filter: [
|
||||
"lowercase",
|
||||
"german_stop",
|
||||
"german_normalization",
|
||||
"german_stemmer",
|
||||
"decomp"
|
||||
]
|
||||
}
|
||||
german_stemmer: {
|
||||
type: "stemmer",
|
||||
language: "light_german"
|
||||
},
|
||||
german_decompounder: {
|
||||
type: "hyphenation_decompounder",
|
||||
word_list_path: "analysis/dictionary-de.txt",
|
||||
hyphenation_patterns_path: "analysis/de_DR.xml",
|
||||
only_longest_match: true,
|
||||
min_subword_size: 4
|
||||
},
|
||||
},
|
||||
analyzer: {
|
||||
german: {
|
||||
tokenizer: "standard",
|
||||
filter: [
|
||||
"lowercase",
|
||||
"german_stop",
|
||||
"german_decompounder",
|
||||
"german_normalization",
|
||||
"german_stemmer"
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
} do mappings dynamic: false do
|
||||
indexes :name, type: :string, analyzer: "german"
|
||||
indexes :content, type: :string, analyzer: "german"
|
||||
indexes :resolution, type: :string, analyzer: "german"
|
||||
indexes :reference, type: :string, index: :not_analyzed
|
||||
indexes :paper_type, type: :string, index: :not_analyzed
|
||||
indexes :published_at, type: :date, index: :not_analyzed
|
||||
indexes :originator, type: :string, index: :not_analyzed
|
||||
indexes :name, type: :text, analyzer: "german"
|
||||
indexes :content, type: :text, analyzer: "german"
|
||||
indexes :resolution, type: :text, analyzer: "german"
|
||||
indexes :reference, type: :keyword, index: true
|
||||
indexes :paper_type, type: :keyword, index: true
|
||||
indexes :published_at, type: :date, index: true
|
||||
indexes :originator, type: :keyword, index: true
|
||||
end
|
||||
end
|
||||
|
||||
|
|
|
@ -1,22 +1,28 @@
|
|||
web:
|
||||
build: .
|
||||
volumes:
|
||||
- .:/app
|
||||
ports:
|
||||
- "3000:3000"
|
||||
links:
|
||||
- elasticsearch
|
||||
environment:
|
||||
ELASTICSEARCH_URL: 'http://elasticsearch:9200'
|
||||
elasticsearch:
|
||||
image: elasticsearch:5.4.3
|
||||
command:
|
||||
- sh
|
||||
- -c
|
||||
- "./bin/elasticsearch-plugin install http://xbib.org/repository/org/xbib/elasticsearch/plugin/elasticsearch-analysis-decompound/5.4.3.0/elasticsearch-analysis-decompound-5.4.3.0-plugin.zip;
|
||||
./bin/elasticsearch-plugin install https://github.com/royrusso/elasticsearch-HQ/zipball/master;
|
||||
/docker-entrypoint.sh elasticsearch"
|
||||
ports:
|
||||
- "9200:9200"
|
||||
volumes:
|
||||
- .:/apps
|
||||
version: "3.7"
|
||||
volumes:
|
||||
elasticsearch:
|
||||
services:
|
||||
web:
|
||||
build: .
|
||||
volumes:
|
||||
- .:/app
|
||||
ports:
|
||||
- "3000:3000"
|
||||
links:
|
||||
- elasticsearch
|
||||
environment:
|
||||
ELASTICSEARCH_URL: 'http://elasticsearch:9200'
|
||||
elasticsearch:
|
||||
build: ./docker/elasticsearch
|
||||
environment:
|
||||
- discovery.type=single-node
|
||||
ports:
|
||||
- "9200:9200"
|
||||
volumes:
|
||||
- elasticsearch:/usr/share/elasticsearch/data
|
||||
elastichq:
|
||||
image: elastichq/elasticsearch-hq
|
||||
ports:
|
||||
- "5000:5000"
|
||||
links:
|
||||
- elasticsearch
|
||||
|
|
8
docker/elasticsearch/Dockerfile
Normal file
8
docker/elasticsearch/Dockerfile
Normal file
|
@ -0,0 +1,8 @@
|
|||
FROM elasticsearch:6.8.6
|
||||
|
||||
RUN \
|
||||
mkdir -p /usr/share/elasticsearch/config/analysis && \
|
||||
pushd /usr/share/elasticsearch/config/analysis && \
|
||||
curl -sSLO https://github.com/uschindler/german-decompounder/raw/master/de_DR.xml && \
|
||||
curl -sSLO https://github.com/uschindler/german-decompounder/raw/master/dictionary-de.txt && \
|
||||
popd
|
Loading…
Reference in a new issue