stadtratmonitor/app/models/paper.rb

129 lines
4.3 KiB
Ruby
Raw Normal View History

require 'elasticsearch/model'
require 'json'
2015-09-27 13:51:13 +02:00
require 'parseable_date_validator'
2015-04-13 22:09:28 +02:00
class Paper < ActiveRecord::Base
include Elasticsearch::Model
include Elasticsearch::Model::Callbacks
2015-09-27 13:51:13 +02:00
validates :name, presence: true, length: { maximum: 1000 }
validates :url, presence: true,
length: { maximum: 1000 },
uniqueness: true, # TODO use unique index instead
url: true
validates :reference, presence: true, length: { maximum: 100 }
validates :body, presence: true, length: { maximum: 100 }
validates :content, presence: true, length: { maximum: 100_000 }
validates :originator, presence: true, length: { maximum: 300 }
validates :paper_type, presence: true, length: { maximum: 50 }
validates :published_at, presence: true, parseable_date: true
validates :resolution, length: { maximum: 30_000 }
2015-06-13 21:18:55 +02:00
settings index: { number_of_shards: 1 } do
mappings dynamic: false do
2015-06-23 00:00:41 +02:00
indexes :name, type: :string, analyzer: "german"
indexes :content, type: :string, analyzer: "german"
indexes :resolution, type: :string, analyzer: "german"
2015-06-13 21:18:55 +02:00
indexes :paper_type, type: :string, index: :not_analyzed
2015-06-13 21:27:22 +02:00
indexes :originator, type: :string, index: :not_analyzed
2015-06-13 21:18:55 +02:00
end
2015-09-27 10:25:40 +02:00
end
2015-06-13 21:18:55 +02:00
2015-06-22 23:51:36 +02:00
def split_originator
originator.split(/\d\.\s/).reject {|s| s.blank?} || originator
end
def as_indexed_json(options={})
as_json.merge(originator: split_originator)
end
2015-04-13 22:09:28 +02:00
class << self
def import_from_json(json_string)
old_count = count
JSON.parse(json_string).each do |record|
2015-04-13 22:09:28 +02:00
attributes = {
body: record['body'],
content: record['content'],
name: record['name'],
resolution: record['resolution'],
originator: record['originator'],
paper_type: record['paper_type'],
published_at: record['published_at'],
reference: record['reference'],
url: record['url'],
2015-04-13 22:09:28 +02:00
}
record = find_or_initialize_by(url: attributes[:url])
record.update_attributes(attributes)
2015-04-13 22:09:28 +02:00
end
puts "Imported #{count - old_count} Papers!"
2015-04-13 22:09:28 +02:00
end
# use DSL to define search queries
# see https://github.com/elastic/elasticsearch-ruby/tree/master/elasticsearch-dsl
# and https://github.com/elastic/elasticsearch-rails/tree/master/elasticsearch-rails/lib/rails/templates
2015-06-22 20:38:24 +02:00
def search(q, options={})
@search_definition = Elasticsearch::DSL::Search.search do
2015-06-13 21:18:55 +02:00
query do
# search query
unless q.blank?
multi_match do
query q
fields ["name", "content"]
end
else
match_all
end
end
2015-06-22 20:38:24 +02:00
# apply filter after aggregations
post_filter do
bool do
must { term paper_type: options[:paper_type] } if options[:paper_type].present?
must { term originator: options[:originator] } if options[:originator].present?
# catchall when no filters set
must { match_all } if options.keys.none? {|k| [:paper_type, :originator].include?(k) }
end
end
2015-06-13 21:18:55 +02:00
aggregation :paper_types do
# filter by originator
f = Elasticsearch::DSL::Search::Filters::Bool.new
f.must { match_all }
f.must { term originator: options[:originator] } if options[:originator].present?
filter f.to_hash do
aggregation :paper_types do
terms do
field 'paper_type'
end
end
2015-06-13 21:18:55 +02:00
end
end
2015-06-13 21:27:22 +02:00
aggregation :originators do
# filter by paper_type
f = Elasticsearch::DSL::Search::Filters::Bool.new
f.must { match_all }
f.must { term paper_type: options[:paper_type] } if options[:paper_type].present?
filter f.to_hash do
aggregation :originators do
terms do
field 'originator'
end
end
2015-06-13 21:27:22 +02:00
end
end
end
2015-06-22 20:38:24 +02:00
Rails.logger.debug "Query: #{@search_definition.to_json}"
__elasticsearch__.search(@search_definition)
end
2015-06-13 21:18:55 +02:00
def reset_index!
__elasticsearch__.create_index! force: true
all.each {|p| p.__elasticsearch__.index_document }
end
2015-04-13 22:09:28 +02:00
end
end