mirror of
https://github.com/CodeforLeipzig/stadtratmonitor.git
synced 2024-12-22 23:53:15 +01:00
Update elasticsearch
- Build our own elasticsearch image based on 6.8.6 which includes configuration files for german decompounder from https://github.com/uschindler/german-decompounder - move elastichq to separate service and use the official image instead of including it as plugin in the elasticsearch image - Adjust compounder filter config in papers to use the included hyphenation_decompounder. See https://www.elastic.co/guide/en/elasticsearch/reference/6.8/analysis-compound-word-tokenfilter.html - Remove obsolete "string" type in the mapping of papers, and replace it with "text" or "keywords" accordingly. See https://www.elastic.co/blog/strings-are-dead-long-live-strings
This commit is contained in:
parent
5dad333655
commit
dc3a97d3b8
5 changed files with 80 additions and 62 deletions
4
Gemfile
4
Gemfile
|
@ -31,8 +31,8 @@ gem 'omniauth'
|
||||||
gem 'omniauth-browserid'
|
gem 'omniauth-browserid'
|
||||||
|
|
||||||
gem 'elasticsearch', '~> 6'
|
gem 'elasticsearch', '~> 6'
|
||||||
gem 'elasticsearch-model'
|
gem 'elasticsearch-model', '~> 6'
|
||||||
gem 'elasticsearch-rails'
|
gem 'elasticsearch-rails', '~> 6'
|
||||||
gem 'elasticsearch-dsl'
|
gem 'elasticsearch-dsl'
|
||||||
|
|
||||||
gem 'leaflet-rails'
|
gem 'leaflet-rails'
|
||||||
|
|
|
@ -79,11 +79,11 @@ GEM
|
||||||
elasticsearch-api (6.0.2)
|
elasticsearch-api (6.0.2)
|
||||||
multi_json
|
multi_json
|
||||||
elasticsearch-dsl (0.1.5)
|
elasticsearch-dsl (0.1.5)
|
||||||
elasticsearch-model (5.0.0)
|
elasticsearch-model (6.1.0)
|
||||||
activesupport (> 3)
|
activesupport (> 3)
|
||||||
elasticsearch (> 1)
|
elasticsearch (> 1)
|
||||||
hashie
|
hashie
|
||||||
elasticsearch-rails (5.0.2)
|
elasticsearch-rails (6.1.0)
|
||||||
elasticsearch-transport (6.0.2)
|
elasticsearch-transport (6.0.2)
|
||||||
faraday
|
faraday
|
||||||
multi_json
|
multi_json
|
||||||
|
@ -287,8 +287,8 @@ DEPENDENCIES
|
||||||
database_cleaner
|
database_cleaner
|
||||||
elasticsearch (~> 6)
|
elasticsearch (~> 6)
|
||||||
elasticsearch-dsl
|
elasticsearch-dsl
|
||||||
elasticsearch-model
|
elasticsearch-model (~> 6)
|
||||||
elasticsearch-rails
|
elasticsearch-rails (~> 6)
|
||||||
factory_bot_rails
|
factory_bot_rails
|
||||||
faker
|
faker
|
||||||
foundation-rails (~> 5.5)
|
foundation-rails (~> 5.5)
|
||||||
|
|
|
@ -33,9 +33,13 @@ class Paper < ActiveRecord::Base
|
||||||
type: "stemmer",
|
type: "stemmer",
|
||||||
language: "light_german"
|
language: "light_german"
|
||||||
},
|
},
|
||||||
decomp: {
|
german_decompounder: {
|
||||||
type: "decompound"
|
type: "hyphenation_decompounder",
|
||||||
}
|
word_list_path: "analysis/dictionary-de.txt",
|
||||||
|
hyphenation_patterns_path: "analysis/de_DR.xml",
|
||||||
|
only_longest_match: true,
|
||||||
|
min_subword_size: 4
|
||||||
|
},
|
||||||
},
|
},
|
||||||
analyzer: {
|
analyzer: {
|
||||||
german: {
|
german: {
|
||||||
|
@ -43,21 +47,21 @@ class Paper < ActiveRecord::Base
|
||||||
filter: [
|
filter: [
|
||||||
"lowercase",
|
"lowercase",
|
||||||
"german_stop",
|
"german_stop",
|
||||||
|
"german_decompounder",
|
||||||
"german_normalization",
|
"german_normalization",
|
||||||
"german_stemmer",
|
"german_stemmer"
|
||||||
"decomp"
|
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} do mappings dynamic: false do
|
} do mappings dynamic: false do
|
||||||
indexes :name, type: :string, analyzer: "german"
|
indexes :name, type: :text, analyzer: "german"
|
||||||
indexes :content, type: :string, analyzer: "german"
|
indexes :content, type: :text, analyzer: "german"
|
||||||
indexes :resolution, type: :string, analyzer: "german"
|
indexes :resolution, type: :text, analyzer: "german"
|
||||||
indexes :reference, type: :string, index: :not_analyzed
|
indexes :reference, type: :keyword, index: true
|
||||||
indexes :paper_type, type: :string, index: :not_analyzed
|
indexes :paper_type, type: :keyword, index: true
|
||||||
indexes :published_at, type: :date, index: :not_analyzed
|
indexes :published_at, type: :date, index: true
|
||||||
indexes :originator, type: :string, index: :not_analyzed
|
indexes :originator, type: :keyword, index: true
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
|
@ -1,3 +1,7 @@
|
||||||
|
version: "3.7"
|
||||||
|
volumes:
|
||||||
|
elasticsearch:
|
||||||
|
services:
|
||||||
web:
|
web:
|
||||||
build: .
|
build: .
|
||||||
volumes:
|
volumes:
|
||||||
|
@ -9,14 +13,16 @@ web:
|
||||||
environment:
|
environment:
|
||||||
ELASTICSEARCH_URL: 'http://elasticsearch:9200'
|
ELASTICSEARCH_URL: 'http://elasticsearch:9200'
|
||||||
elasticsearch:
|
elasticsearch:
|
||||||
image: elasticsearch:5.4.3
|
build: ./docker/elasticsearch
|
||||||
command:
|
environment:
|
||||||
- sh
|
- discovery.type=single-node
|
||||||
- -c
|
|
||||||
- "./bin/elasticsearch-plugin install http://xbib.org/repository/org/xbib/elasticsearch/plugin/elasticsearch-analysis-decompound/5.4.3.0/elasticsearch-analysis-decompound-5.4.3.0-plugin.zip;
|
|
||||||
./bin/elasticsearch-plugin install https://github.com/royrusso/elasticsearch-HQ/zipball/master;
|
|
||||||
/docker-entrypoint.sh elasticsearch"
|
|
||||||
ports:
|
ports:
|
||||||
- "9200:9200"
|
- "9200:9200"
|
||||||
volumes:
|
volumes:
|
||||||
- .:/apps
|
- elasticsearch:/usr/share/elasticsearch/data
|
||||||
|
elastichq:
|
||||||
|
image: elastichq/elasticsearch-hq
|
||||||
|
ports:
|
||||||
|
- "5000:5000"
|
||||||
|
links:
|
||||||
|
- elasticsearch
|
||||||
|
|
8
docker/elasticsearch/Dockerfile
Normal file
8
docker/elasticsearch/Dockerfile
Normal file
|
@ -0,0 +1,8 @@
|
||||||
|
FROM elasticsearch:6.8.6
|
||||||
|
|
||||||
|
RUN \
|
||||||
|
mkdir -p /usr/share/elasticsearch/config/analysis && \
|
||||||
|
pushd /usr/share/elasticsearch/config/analysis && \
|
||||||
|
curl -sSLO https://github.com/uschindler/german-decompounder/raw/master/de_DR.xml && \
|
||||||
|
curl -sSLO https://github.com/uschindler/german-decompounder/raw/master/dictionary-de.txt && \
|
||||||
|
popd
|
Loading…
Reference in a new issue