From 5c96de3dedc0e208d119004048af809865ca0e77 Mon Sep 17 00:00:00 2001 From: Joerg Reichert Date: Thu, 29 Dec 2016 18:22:10 +0100 Subject: [PATCH] 28 - use elasticsearch-analysis-decompound plugin to support finding results within compound words --- .travis.yml | 1 + app/models/paper.rb | 33 +++++++++++++++++++++++++++++++-- docker-compose.yml | 3 +++ docker-entrypoint-es-plugins.sh | 5 +++++ 4 files changed, 40 insertions(+), 2 deletions(-) create mode 100644 docker-entrypoint-es-plugins.sh diff --git a/.travis.yml b/.travis.yml index 73cfa9f..22714ad 100644 --- a/.travis.yml +++ b/.travis.yml @@ -7,5 +7,6 @@ services: - docker script: + - chmod +x docker-entrypoint-es-plugins.sh - docker-compose run web rake db:setup - docker-compose run -e CI=true -e TRAVIS=true -e TRAVIS_BRANCH=$TRAVIS_BRANCH -e TRAVIS_COMMIT=$TRAVIS_COMMIT -e TRAVIS_JOB_NUMBER=$TRAVIS_JOB_NUMBER -e TRAVIS_PULL_REQUEST=$TRAVIS_PULL_REQUEST -e TRAVIS_JOB_ID=$TRAVIS_JOB_ID -e TRAVIS_REPO_SLUG=$TRAVIS_REPO_SLUG web bin/run-tests diff --git a/app/models/paper.rb b/app/models/paper.rb index 77ab1d3..d01bae2 100644 --- a/app/models/paper.rb +++ b/app/models/paper.rb @@ -21,11 +21,40 @@ class Paper < ActiveRecord::Base index_name ['srm', Rails.env, self.base_class.to_s.pluralize.underscore].join('_') - settings index: { number_of_shards: 1 } do - mappings dynamic: false do + settings index: { + number_of_shards: 1, + analysis: { + filter: { + german_stop: { + type: "stop", + stopwords: "_german_" + }, + german_stemmer: { + type: "stemmer", + language: "light_german" + }, + decomp: { + type: "decompound" + } + }, + analyzer: { + german: { + tokenizer: "standard", + filter: [ + "lowercase", + "german_stop", + "german_normalization", + "german_stemmer", + "decomp" + ] + } + } + } + } do mappings dynamic: false do indexes :name, type: :string, analyzer: "german" indexes :content, type: :string, analyzer: "german" indexes :resolution, type: :string, analyzer: "german" + indexes :reference, type: :string, index: :not_analyzed indexes :paper_type, type: :string, index: :not_analyzed indexes :published_at, type: :date, index: :not_analyzed indexes :originator, type: :string, index: :not_analyzed diff --git a/docker-compose.yml b/docker-compose.yml index 767a382..0634c77 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -10,3 +10,6 @@ web: ELASTICSEARCH_URL: 'http://elasticsearch:9200' elasticsearch: image: elasticsearch:1.7 + volumes: + - .:/apps + entrypoint: /apps/docker-entrypoint-es-plugins.sh diff --git a/docker-entrypoint-es-plugins.sh b/docker-entrypoint-es-plugins.sh new file mode 100644 index 0000000..03dc649 --- /dev/null +++ b/docker-entrypoint-es-plugins.sh @@ -0,0 +1,5 @@ +#!/bin/bash + +plugin -install elasticsearch-analysis-decompound --url http://xbib.org/repository/org/xbib/elasticsearch/plugin/elasticsearch-analysis-decompound/1.7.1.3/elasticsearch-analysis-decompound-1.7.1.3-plugin.zip + +exec /docker-entrypoint.sh elasticsearch