stadtratmonitor/app/models/paper.rb

76 lines
2.7 KiB
Ruby
Raw Permalink Normal View History

require 'elasticsearch/model'
require 'json'
2015-09-27 13:51:13 +02:00
require 'parseable_date_validator'
2015-04-13 22:09:28 +02:00
class Paper < ActiveRecord::Base
include Elasticsearch::Model
include Elasticsearch::Model::Callbacks
2015-09-27 13:51:13 +02:00
validates :name, presence: true, length: { maximum: 1000 }
validates :url, presence: true,
length: { maximum: 1000 },
uniqueness: true, # TODO use unique index instead
url: true
validates :reference, presence: true, length: { maximum: 100 }
validates :body, presence: true, length: { maximum: 100 }
validates :content, presence: true, length: { maximum: 100_000 }
validates :originator, presence: true, length: { maximum: 300 }
validates :paper_type, presence: true, length: { maximum: 50 }
validates :published_at, presence: true, parseable_date: true
validates :resolution, length: { maximum: 30_000 }
2015-06-13 21:18:55 +02:00
settings index: { number_of_shards: 1 } do
mappings dynamic: false do
2015-06-23 00:00:41 +02:00
indexes :name, type: :string, analyzer: "german"
indexes :content, type: :string, analyzer: "german"
indexes :resolution, type: :string, analyzer: "german"
2015-06-13 21:18:55 +02:00
indexes :paper_type, type: :string, index: :not_analyzed
2015-06-13 21:27:22 +02:00
indexes :originator, type: :string, index: :not_analyzed
2015-06-13 21:18:55 +02:00
end
2015-09-27 10:25:40 +02:00
end
2015-06-13 21:18:55 +02:00
2015-06-22 23:51:36 +02:00
def split_originator
originator.split(/\d\.\s/).reject {|s| s.blank?} || originator
end
def as_indexed_json(options={})
as_json.merge(originator: split_originator)
end
2015-04-13 22:09:28 +02:00
class << self
def import_from_json(json_string)
old_count = count
JSON.parse(json_string).each do |record|
2015-04-13 22:09:28 +02:00
attributes = {
body: record['body'],
content: record['content'],
name: record['name'],
resolution: record['resolution'],
originator: record['originator'],
paper_type: record['paper_type'],
published_at: record['published_at'],
reference: record['reference'],
url: record['url'],
2015-04-13 22:09:28 +02:00
}
record = find_or_initialize_by(url: attributes[:url])
record.update_attributes(attributes)
2015-04-13 22:09:28 +02:00
end
puts "Imported #{count - old_count} Papers!"
2015-04-13 22:09:28 +02:00
end
# use DSL to define search queries
# see https://github.com/elastic/elasticsearch-ruby/tree/master/elasticsearch-dsl
# and https://github.com/elastic/elasticsearch-rails/tree/master/elasticsearch-rails/lib/rails/templates
def search(search_definition)
Rails.logger.debug "Query: #{search_definition.to_json}"
__elasticsearch__.search(search_definition)
end
2015-06-13 21:18:55 +02:00
def reset_index!
__elasticsearch__.create_index! force: true
all.each {|p| p.__elasticsearch__.index_document }
end
2015-04-13 22:09:28 +02:00
end
end