From 46de02623428db3da4231e03110c7880f61632af Mon Sep 17 00:00:00 2001 From: Andreas Haller Date: Sun, 17 May 2015 17:49:09 +0200 Subject: [PATCH] Add importer model, validations to paper model --- app/models/importer.rb | 3 +++ app/models/paper.rb | 16 ++++++++++++---- db/migrate/20150413193656_create_papers.rb | 6 ++++++ db/migrate/20150517152218_create_importers.rb | 8 ++++++++ db/schema.rb | 18 +++++++++++++++--- lib/tasks/import_papers.rake | 11 +++++------ test/fixtures/importers.yml | 11 +++++++++++ test/models/importer_test.rb | 7 +++++++ 8 files changed, 67 insertions(+), 13 deletions(-) create mode 100644 app/models/importer.rb create mode 100644 db/migrate/20150517152218_create_importers.rb create mode 100644 test/fixtures/importers.yml create mode 100644 test/models/importer_test.rb diff --git a/app/models/importer.rb b/app/models/importer.rb new file mode 100644 index 0000000..f58649b --- /dev/null +++ b/app/models/importer.rb @@ -0,0 +1,3 @@ +class Importer < ActiveRecord::Base + validates :url, presence: true, uniqueness: true +end diff --git a/app/models/paper.rb b/app/models/paper.rb index 9dbd741..ee05247 100644 --- a/app/models/paper.rb +++ b/app/models/paper.rb @@ -5,20 +5,28 @@ class Paper < ActiveRecord::Base include Elasticsearch::Model include Elasticsearch::Model::Callbacks + validates_presence_of :body, :content, :name, :originator, :paper_type, :published_at, :reference, :url + validates :url, uniqueness: true + class << self def import_from_json(json_string) + old_count = count JSON.parse(json_string).each do |record| attributes = { + body: record['body'], + content: record['content'], name: record['name'], - url: record['url'], - reference: record['reference'], - paper_type: record['paper_type'], + resolution: record['resolution'], originator: record['originator'], + paper_type: record['paper_type'], published_at: record['published_at'], + reference: record['reference'], + url: record['url'], } record = find_or_initialize_by(url: attributes[:url]) - record.update_attributes!(attributes) + record.update_attributes(attributes) end + puts "Imported #{count - old_count} Papers!" end end end diff --git a/db/migrate/20150413193656_create_papers.rb b/db/migrate/20150413193656_create_papers.rb index 591b233..5142855 100644 --- a/db/migrate/20150413193656_create_papers.rb +++ b/db/migrate/20150413193656_create_papers.rb @@ -5,6 +5,7 @@ class CreatePapers < ActiveRecord::Migration t.string :url t.string :reference t.string :name + t.string :body t.datetime :published_at t.datetime :scraped_at t.string :paper_type @@ -14,5 +15,10 @@ class CreatePapers < ActiveRecord::Migration t.timestamps end + + add_index(:papers, :reference) + add_index(:papers, :originator) + add_index(:papers, :body) + add_index(:papers, [:reference, :body], unique: true) end end diff --git a/db/migrate/20150517152218_create_importers.rb b/db/migrate/20150517152218_create_importers.rb new file mode 100644 index 0000000..7426e57 --- /dev/null +++ b/db/migrate/20150517152218_create_importers.rb @@ -0,0 +1,8 @@ +class CreateImporters < ActiveRecord::Migration + def change + create_table :importers do |t| + t.string :url + t.timestamps null: false + end + end +end diff --git a/db/schema.rb b/db/schema.rb index 4d62ad8..bebb6e9 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -11,12 +11,19 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema.define(version: 20150413193656) do +ActiveRecord::Schema.define(version: 20150517152218) do - create_table "papers", force: true do |t| + create_table "importers", force: :cascade do |t| + t.string "url" + t.datetime "created_at", null: false + t.datetime "updated_at", null: false + end + + create_table "papers", force: :cascade do |t| t.string "name" t.string "url" t.string "reference" + t.string "body" t.datetime "published_at" t.datetime "scraped_at" t.string "paper_type" @@ -27,7 +34,12 @@ ActiveRecord::Schema.define(version: 20150413193656) do t.datetime "updated_at" end - create_table "users", force: true do |t| + add_index "papers", ["body"], name: "index_papers_on_body" + add_index "papers", ["originator"], name: "index_papers_on_originator" + add_index "papers", ["reference", "body"], name: "index_papers_on_reference_and_body", unique: true + add_index "papers", ["reference"], name: "index_papers_on_reference" + + create_table "users", force: :cascade do |t| t.string "email" t.datetime "created_at" t.datetime "updated_at" diff --git a/lib/tasks/import_papers.rake b/lib/tasks/import_papers.rake index 6ba6f13..31abc45 100644 --- a/lib/tasks/import_papers.rake +++ b/lib/tasks/import_papers.rake @@ -1,11 +1,10 @@ namespace :import_papers do - desc 'Import Paper records from CSV' - task :from_morph => :environment do |t, args| + task :from_morph => :environment do require 'open-uri' - api_key = ENV['MORPH_API_KEY'] - uri = URI.parse "https://api.morph.io/ahx/city_council_leipzig_recent_papers/data.json?key=#{api_key}&query=select%20*%20from%20%27data%27" - puts "Download files from #{uri}…" - Paper.import_from_json(uri.read) + Importer.all.each do |importer| + uri = URI.parse(importer.url) + Paper.import_from_json(uri.read) + end end end diff --git a/test/fixtures/importers.yml b/test/fixtures/importers.yml new file mode 100644 index 0000000..937a0c0 --- /dev/null +++ b/test/fixtures/importers.yml @@ -0,0 +1,11 @@ +# Read about fixtures at http://api.rubyonrails.org/classes/ActiveRecord/FixtureSet.html + +# This model initially had no columns defined. If you add columns to the +# model remove the '{}' from the fixture names and add the columns immediately +# below each fixture, per the syntax in the comments below +# +one: {} +# column: value +# +two: {} +# column: value diff --git a/test/models/importer_test.rb b/test/models/importer_test.rb new file mode 100644 index 0000000..7d2f9e5 --- /dev/null +++ b/test/models/importer_test.rb @@ -0,0 +1,7 @@ +require 'test_helper' + +class ImporterTest < ActiveSupport::TestCase + # test "the truth" do + # assert true + # end +end