require 'elasticsearch/model' require 'json' require 'parseable_date_validator' class Paper < ActiveRecord::Base include Elasticsearch::Model include Elasticsearch::Model::Callbacks validates :name, presence: true, length: { maximum: 1000 } validates :url, presence: true, length: { maximum: 1000 }, uniqueness: true, # TODO use unique index instead url: true validates :reference, presence: true, length: { maximum: 100 } validates :body, presence: true, length: { maximum: 100 } validates :content, presence: true, length: { maximum: 100_000 } validates :originator, presence: true, length: { maximum: 300 } validates :paper_type, presence: true, length: { maximum: 50 } validates :published_at, presence: true, parseable_date: true validates :resolution, length: { maximum: 30_000 } index_name ['srm', Rails.env, self.base_class.to_s.pluralize.underscore].join('_') settings index: { number_of_shards: 1, analysis: { filter: { german_stop: { type: "stop", stopwords: "_german_" }, german_stemmer: { type: "stemmer", language: "light_german" }, german_decompounder: { type: "hyphenation_decompounder", word_list_path: "analysis/dictionary-de.txt", hyphenation_patterns_path: "analysis/de_DR.xml", only_longest_match: true, min_subword_size: 4 }, }, analyzer: { german: { tokenizer: "standard", filter: [ "lowercase", "german_stop", "german_decompounder", "german_normalization", "german_stemmer" ] } } } } do mappings dynamic: false do indexes :name, type: :text, analyzer: "german" indexes :content, type: :text, analyzer: "german" indexes :resolution, type: :text, analyzer: "german" indexes :reference, type: :keyword, index: true indexes :paper_type, type: :keyword, index: true indexes :published_at, type: :date, index: true indexes :originator, type: :keyword, index: true end end def split_originator originator.split(/\d\.\s/).reject {|s| s.blank?} || originator end def as_indexed_json(options={}) as_json.merge(originator: split_originator) end class << self def import_from_json(json_string) old_count = count JSON.parse(json_string).each do |record| attributes = { body: record['body'], content: record['content'], name: record['name'], resolution: record['resolution'], originator: record['originator'], paper_type: record['paper_type'], published_at: record['published_at'], reference: record['reference'], url: record['url'], } record = find_or_initialize_by(url: attributes[:url]) record.update_attributes(attributes) end puts "Imported #{count - old_count} Papers!" end # use DSL to define search queries # see https://github.com/elastic/elasticsearch-ruby/tree/master/elasticsearch-dsl # and https://github.com/elastic/elasticsearch-rails/tree/master/elasticsearch-rails/lib/rails/templates def search(search_definition) Rails.logger.debug "Query: #{search_definition.to_json}" __elasticsearch__.search(search_definition) end def reset_index! __elasticsearch__.create_index! force: true all.each {|p| p.__elasticsearch__.index_document } end end end