mirror of
https://github.com/CodeforLeipzig/stadtratmonitor.git
synced 2025-04-20 07:11:33 +02:00
Update elasticsearch
- Build our own elasticsearch image based on 6.8.6 which includes configuration files for german decompounder from https://github.com/uschindler/german-decompounder - move elastichq to separate service and use the official image instead of including it as plugin in the elasticsearch image - Adjust compounder filter config in papers to use the included hyphenation_decompounder. See https://www.elastic.co/guide/en/elasticsearch/reference/6.8/analysis-compound-word-tokenfilter.html - Remove obsolete "string" type in the mapping of papers, and replace it with "text" or "keywords" accordingly. See https://www.elastic.co/blog/strings-are-dead-long-live-strings
This commit is contained in:
parent
5dad333655
commit
dc3a97d3b8
5 changed files with 80 additions and 62 deletions
|
@ -21,43 +21,47 @@ class Paper < ActiveRecord::Base
|
|||
|
||||
index_name ['srm', Rails.env, self.base_class.to_s.pluralize.underscore].join('_')
|
||||
|
||||
settings index: {
|
||||
number_of_shards: 1,
|
||||
analysis: {
|
||||
filter: {
|
||||
german_stop: {
|
||||
type: "stop",
|
||||
stopwords: "_german_"
|
||||
},
|
||||
german_stemmer: {
|
||||
type: "stemmer",
|
||||
language: "light_german"
|
||||
},
|
||||
decomp: {
|
||||
type: "decompound"
|
||||
}
|
||||
},
|
||||
analyzer: {
|
||||
german: {
|
||||
tokenizer: "standard",
|
||||
filter: [
|
||||
"lowercase",
|
||||
"german_stop",
|
||||
"german_normalization",
|
||||
"german_stemmer",
|
||||
"decomp"
|
||||
]
|
||||
}
|
||||
settings index: {
|
||||
number_of_shards: 1,
|
||||
analysis: {
|
||||
filter: {
|
||||
german_stop: {
|
||||
type: "stop",
|
||||
stopwords: "_german_"
|
||||
},
|
||||
german_stemmer: {
|
||||
type: "stemmer",
|
||||
language: "light_german"
|
||||
},
|
||||
german_decompounder: {
|
||||
type: "hyphenation_decompounder",
|
||||
word_list_path: "analysis/dictionary-de.txt",
|
||||
hyphenation_patterns_path: "analysis/de_DR.xml",
|
||||
only_longest_match: true,
|
||||
min_subword_size: 4
|
||||
},
|
||||
},
|
||||
analyzer: {
|
||||
german: {
|
||||
tokenizer: "standard",
|
||||
filter: [
|
||||
"lowercase",
|
||||
"german_stop",
|
||||
"german_decompounder",
|
||||
"german_normalization",
|
||||
"german_stemmer"
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
} do mappings dynamic: false do
|
||||
indexes :name, type: :string, analyzer: "german"
|
||||
indexes :content, type: :string, analyzer: "german"
|
||||
indexes :resolution, type: :string, analyzer: "german"
|
||||
indexes :reference, type: :string, index: :not_analyzed
|
||||
indexes :paper_type, type: :string, index: :not_analyzed
|
||||
indexes :published_at, type: :date, index: :not_analyzed
|
||||
indexes :originator, type: :string, index: :not_analyzed
|
||||
indexes :name, type: :text, analyzer: "german"
|
||||
indexes :content, type: :text, analyzer: "german"
|
||||
indexes :resolution, type: :text, analyzer: "german"
|
||||
indexes :reference, type: :keyword, index: true
|
||||
indexes :paper_type, type: :keyword, index: true
|
||||
indexes :published_at, type: :date, index: true
|
||||
indexes :originator, type: :keyword, index: true
|
||||
end
|
||||
end
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue