Browse Source

Merge pull request #122 from ilya-konanykhin/csv-import

CSV import
master
Jason Denney 7 years ago
committed by GitHub
parent
commit
4a30bc5efd
  1. 1
      Gemfile
  2. 2
      Gemfile.lock
  3. 5
      app/models/bike.rb
  4. 160
      app/models/bike_csv_importer.rb
  5. 93
      app/models/bike_csv_importer/bike_attrs.rb
  6. 35
      app/models/bike_csv_importer/cache.rb
  7. 16
      app/models/bike_csv_importer/cleaner.rb
  8. 42
      app/models/bike_csv_importer/logs.rb
  9. 36
      lib/tasks/import.rake

1
Gemfile

@ -35,6 +35,7 @@ group :development, :test do
gem 'factory_girl_rails', '~> 1.2'
gem 'pry', '~> 0.9.8'
gem 'faker', '~> 1.2.0'
gem 'colorize'
end
group :test do

2
Gemfile.lock

@ -69,6 +69,7 @@ GEM
coffee-script-source
execjs
coffee-script-source (1.10.0)
colorize (0.8.1)
database_cleaner (1.2.0)
decent_exposure (1.0.2)
devise (2.0.6)
@ -255,6 +256,7 @@ DEPENDENCIES
cancan
capybara (~> 2.2.1)
coffee-rails (~> 3.2.1)
colorize
database_cleaner (~> 1.2.0)
decent_exposure (~> 1.0.1)
devise (~> 2.0.4)

5
app/models/bike.rb

@ -1,13 +1,14 @@
class Bike < ActiveRecord::Base
acts_as_loggable
attr_accessible :shop_id, :serial_number, :bike_brand_id, :model, :color, :bike_style_id, :seat_tube_height,
:top_tube_length, :bike_wheel_size_id, :value, :bike_condition_id, :bike_purpose_id, :photo
attr_accessible :shop_id, :serial_number, :bike_brand_id, :bike_model_id, :model, :color, :bike_style_id,
:seat_tube_height, :top_tube_length, :bike_wheel_size_id, :value, :bike_condition_id, :bike_purpose_id, :photo
has_many :transactions
has_one :owner, :class_name => 'User'
has_one :task_list, :as => :item, :dependent => :destroy
belongs_to :bike_brand
belongs_to :bike_model
belongs_to :bike_style
belongs_to :bike_condition
belongs_to :bike_purpose

160
app/models/bike_csv_importer.rb

@ -0,0 +1,160 @@
require 'csv'
# Imports data from CSV file into the bikes database.
class BikeCsvImporter
include BikeCsvImporter::Cache
include BikeCsvImporter::Cleaner
include BikeCsvImporter::BikeAttrs
include BikeCsvImporter::Logs
attr_reader :file
# Default constructor
#
# @param [String] file Path to the CSV file
def initialize(file)
@file = file
end
# Runs the import. Will print out progress to stdout
#
# @param [Boolean] dry_run If true, does not save data, only shows the progress of validation
def run(dry_run)
imported_count, skipped_count = 0, 0
puts "Performing a #{dry_run ? 'DRY RUN' : 'LIVE RUN'} of import"
fetch do |bike_hash|
bike = new_bike bike_hash
check_method = dry_run ? :valid? : :save
if bike.try check_method
puts "Imported #{bike.shop_id}: #{bike}".green
logs = new_logs_entries bike, bike_hash
logs.each do |log|
if log.send check_method
puts "\tLog entry created: #{log.inspect}".green
else
puts "\tLog entry creation failed: #{log.errors.full_messages.join '; '}".red
end
end
imported_count += 1
else
puts "Skipped #{bike.try(:shop_id) || bike_hash.values.first}: #{bike.try(:errors).try(:full_messages).try :join, '; '}".red
skipped_count += 1
end
end
puts "#{imported_count} bikes imported, #{skipped_count} bikes skipped, total of #{imported_count + skipped_count} rows in the CSV"
end
# Analyzes and prints out the input CSV file values
#
# @param [Array<Strong>] fields If passed, analyze only the given fields (names are down cased)
def analyze(fields = [])
puts "Analyzing CSV values frequency for #{fields.any? ? fields.join(', ') + ' field' : 'all fields'}"
fields = fields.map &:downcase
grouped = {}
fetch do |bike_hash|
bike_hash.each do |key, value|
next if fields.any? && !fields.include?(key)
grouped[key] ||= {}
grouped[key][value] ||= 0
grouped[key][value] += 1
end
end
grouped.each do |field, values|
puts "#{field}:"
values.each do |value, count|
puts "\t#{value.inspect}: #{count}"
end
puts "\tTotal of #{values.count} distinct values"
end
end
# Imports new brands from CSV file (field 'make'). Will print out progress to stdout
#
# @param [Boolean] dry_run If true, does not save data, only shows the progress of validation
def brands(dry_run)
created_count, skipped_count = 0, 0
puts "Performing a #{dry_run ? 'DRY RUN' : 'LIVE RUN'} of brands import"
fetch do |bike_hash|
make = clean_value bike_hash['make']
brand = bike_attr_bike_brand make, true
check_method = dry_run ? :valid? : :save
if brand.try :persisted?
puts "Skipped already existing brand #{brand.brand}"
skipped_count +=1
elsif brand.try check_method
puts "Created brand #{brand.brand}".green
created_count += 1
else
puts "Skipped #{brand.try(:brand) || make}: #{brand.try(:errors).try(:full_messages).try(:join, '; ') || 'object not created'}".red
skipped_count += 1
end
end
puts "#{created_count} brand created, #{skipped_count} brand skipped, total of #{created_count + skipped_count} rows in the CSV"
end
private
# Parses the CSV header & rows, yielding a block for each row (except the header)
# Header is down cased!
#
# @param [Proc] &block The block to yield to
def fetch
CSV.foreach(file).each_with_index do |row, i|
if i.zero?
parse_header row
else
yield parse_bike(row)
end
end
end
# Parses & stores the input header, down casing by the way
#
# @param [Array<String>] row
def parse_header(row)
@header = row.map(&:downcase)
end
# Parses the input row into a hash with keys from the header, @see #parse_header
#
# @param [Array<String>] row
#
# @return [Hash]
def parse_bike(row)
@header.zip(row).to_h
end
# Constructs a new Bike instance from the given hash from a CSV row
#
# @param [Hash] bike_hash
#
# @return [Bike]
def new_bike(bike_hash)
Bike.new bike_attrs(bike_hash)
end
# Constructs new Bike Log Entries instances from the given hash from a CSV row
#
# @param [Bike] bike The Bike instance to construct log entries for
# @param [Hash] bike_hash The input hash from a CSV row
#
# @return [Array<ActsAsLoggable::Log>]
def new_logs_entries(bike, bike_hash)
%i{ acquired comment gone }.map { |x| send :"log_entry_#{x}", bike, bike_hash }.compact
end
end

93
app/models/bike_csv_importer/bike_attrs.rb

@ -0,0 +1,93 @@
# Helper module to create various Bike instanct fields from a CSV row hash
class BikeCsvImporter
module BikeAttrs
def bike_attr_fields
{
shop_id: 'velocipede number',
bike_purpose_id: 'program',
#gone: 'gone',
value: 'price',
bike_brand_id: 'make',
bike_model_id: 'model',
model: 'model',
bike_style_id: nil,
bike_condition_id: nil,
seat_tube_height: nil,
bike_wheel_size_id: nil,
serial_number: nil,
}
end
def bike_attrs(bike_hash)
bike_attr_fields.each_with_object({}) do |(model_field, csv_field), memo|
memo[model_field] = send :"bike_attr_#{model_field}", clean_value(bike_hash[csv_field])
end
end
def bike_attr_shop_id(value)
value.to_i
end
def bike_attr_bike_purpose_id(value)
map = {
'SALE' => /shop|as(-|\s+)is|safety\s*check/,
'BUILDBIKE' => /build|bikes.*world/,
'STORAGE' => nil,
'PARTS' => /part|frame/,
'SCRAP' => /scrap|strip/,
}
default = 'UNDETERMINED'
test_value = value.try :downcase
value = map.find { |_, regexp| regexp.try :match, test_value }.try :first
cached_bike_purpose(value || default).id
end
def bike_attr_gone(value)
%w{ yes yeah y }.include? value.try :downcase
end
def bike_attr_value(value)
value.try(:gsub, /[$]/, '').try :to_i
end
def bike_attr_bike_brand(value, new_if_empty = false)
value = 'Unknown' if !value || value =~ /\Aunknown/i
cached_bike_brand value, new_if_empty
end
def bike_attr_bike_brand_id(value)
bike_attr_bike_brand(value, false).try :id
end
def bike_attr_bike_model_id(value)
return unless value
cached_bike_model(value).try :id
end
def bike_attr_model(value)
value if value && value !~ /unknown/i
end
def bike_attr_bike_style_id(_)
@bike_style_other_cache ||= BikeStyle.find_by_style('OTHER').id
end
def bike_attr_bike_condition_id(_)
@bike_condition_undertermined_cache ||= BikeCondition.find_by_condition('UNDETERMINED').id
end
def bike_attr_seat_tube_height(_)
0
end
def bike_attr_bike_wheel_size_id(_)
@bike_condition_wheel_size_undertermined_cache ||= BikeWheelSize.find_by_description('UNDETERMINED').id
end
def bike_attr_serial_number(_)
'UNDETERMINED'
end
end
end

35
app/models/bike_csv_importer/cache.rb

@ -0,0 +1,35 @@
# Helper module to create various cached instances for bike CSV imports
class BikeCsvImporter
module Cache
def cached_bike_purpose(purpose)
@bike_purpose_cache ||= {}
@bike_purpose_cache[purpose] ||= BikePurpose.find_by_purpose purpose
end
def cached_bike_brand(brand, new_if_empty = false)
@bike_brand_cache ||= {}
if @bike_brand_cache.has_key? brand
@bike_brand_cache[brand]
else
bike_brand = BikeBrand.where('lower(brand) = ?', brand.downcase).first
bike_brand ||= BikeBrand.new(brand: brand) if new_if_empty
@bike_brand_cache[brand] = bike_brand
end
end
def cached_bike_model(model)
@bike_model_cache ||= {}
if @bike_model_cache.has_key? model
@bike_model_cache[model]
else
@bike_model_cache[model] = BikeModel.where('lower(model) = ?', model.downcase).first
end
end
def cached_log_bike_action(action)
@log_bike_action_id_cache ||= {}
@log_bike_action_id_cache[action] ||= ActsAsLoggable::BikeAction.find_by_action(action)
end
end
end

16
app/models/bike_csv_importer/cleaner.rb

@ -0,0 +1,16 @@
# Helper module to clean the incoming data from CSV fields
class BikeCsvImporter
module Cleaner
def clean_value(value)
value_or_nil strip_value(value)
end
def strip_value(value)
value.try(:strip).try(:gsub, /\n|\r/, '')
end
def value_or_nil(value)
return value unless ['?', 'n/a', 'missing', 'unknown', ''].include? value.try(:downcase)
end
end
end

42
app/models/bike_csv_importer/logs.rb

@ -0,0 +1,42 @@
# Helper module to create ActsAsLoggable log entries for a Bike instance from a CSV row hash
class BikeCsvImporter
module Logs
def log_entry_gone(bike, bike_hash)
if clean_value(bike_hash['gone']).to_s =~ /y/i
log_entry bike, log_entry_date(clean_value(bike_hash['date out'])), 'COMPLETED', 'Gone'
end
end
def log_entry_acquired(bike, bike_hash)
if clean_value(bike_hash['date in'])
log_entry bike, log_entry_date(clean_value(bike_hash['date in'])), 'ACQUIRED'
end
end
def log_entry_comment(bike, bike_hash)
if clean_value(bike_hash['comment']).present?
log_entry bike, nil, 'NOTE', clean_value(bike_hash['comment'])
end
end
def log_entry_date(value)
return unless value
Date.strptime value, '%m/%d/%y' rescue nil
end
def log_entry(bike, date, type, description = nil)
date ||= DateTime.now
bike_action = cached_log_bike_action(type)
ActsAsLoggable::Log.new(
loggable_type: bike.class.to_s,
loggable_id: bike.id || bike.shop_id.to_i, # for dry run
log_action_type: bike_action.class.to_s,
log_action_id: bike_action.id,
start_date: date,
end_date: date,
description: description,
)
end
end
end

36
lib/tasks/import.rake

@ -0,0 +1,36 @@
namespace :import do
namespace :bikes do
# Imports bikes info from CSV file
#
# rake import:bikes:csv[import.csv,dry] # dry run
# rake import:bikes:csv[import.csv] # live import
task :csv, [:file, :dry_run] => :environment do |t, args|
file, dry_run = args.values_at :file, :dry_run
next puts "Usage: rake #{t.name}[$csv_file_path[,$dry_run=dry]]" unless file
next puts "File #{file} does not exist or is unreachable" unless File.readable? file
BikeCsvImporter.new(file).run dry_run == 'dry'
end
# Analyze a single field from CSV file
#
# rake import:bikes:analyze_csv[import.csv] # dumps all fields data
# rake import:bikes:analyze_csv[import.csv,"date in"] # shows only single field
task :analyze_csv, [:file, :field] => :environment do |t, args|
file, field = args.values_at :file, :field
next puts "Usage: rake #{t.name}[$csv_file_path[,\"$field_name\"]]" unless file
next puts "File #{file} does not exist or is unreachable" unless File.readable? file
BikeCsvImporter.new(file).analyze field ? [field] : []
end
# Imports new brands from CSV file
#
# rake import:bikes:brands_csv[import.csv,dry] # dry run
# rake import:bikes:brands_csv[import.csv] # live import
task :brands_csv, [:file, :dry_run] => :environment do |t, args|
file, dry_run = args.values_at :file, :dry_run
next puts "Usage: rake #{t.name}[$csv_file_path[,$dry_run=dry]]" unless file
next puts "File #{file} does not exist or is unreachable" unless File.readable? file
BikeCsvImporter.new(file).brands dry_run == 'dry'
end
end
end
Loading…
Cancel
Save