mirror of https://github.com/fspc/BikeShed-1.git
Jason Denney
8 years ago
committed by
GitHub
9 changed files with 388 additions and 2 deletions
@ -0,0 +1,160 @@ |
|||
require 'csv' |
|||
|
|||
# Imports data from CSV file into the bikes database. |
|||
class BikeCsvImporter |
|||
|
|||
include BikeCsvImporter::Cache |
|||
include BikeCsvImporter::Cleaner |
|||
include BikeCsvImporter::BikeAttrs |
|||
include BikeCsvImporter::Logs |
|||
|
|||
attr_reader :file |
|||
|
|||
# Default constructor |
|||
# |
|||
# @param [String] file Path to the CSV file |
|||
def initialize(file) |
|||
@file = file |
|||
end |
|||
|
|||
# Runs the import. Will print out progress to stdout |
|||
# |
|||
# @param [Boolean] dry_run If true, does not save data, only shows the progress of validation |
|||
def run(dry_run) |
|||
imported_count, skipped_count = 0, 0 |
|||
|
|||
puts "Performing a #{dry_run ? 'DRY RUN' : 'LIVE RUN'} of import" |
|||
|
|||
fetch do |bike_hash| |
|||
bike = new_bike bike_hash |
|||
check_method = dry_run ? :valid? : :save |
|||
|
|||
if bike.try check_method |
|||
puts "Imported #{bike.shop_id}: #{bike}".green |
|||
|
|||
logs = new_logs_entries bike, bike_hash |
|||
logs.each do |log| |
|||
if log.send check_method |
|||
puts "\tLog entry created: #{log.inspect}".green |
|||
else |
|||
puts "\tLog entry creation failed: #{log.errors.full_messages.join '; '}".red |
|||
end |
|||
end |
|||
|
|||
imported_count += 1 |
|||
else |
|||
puts "Skipped #{bike.try(:shop_id) || bike_hash.values.first}: #{bike.try(:errors).try(:full_messages).try :join, '; '}".red |
|||
skipped_count += 1 |
|||
end |
|||
end |
|||
|
|||
puts "#{imported_count} bikes imported, #{skipped_count} bikes skipped, total of #{imported_count + skipped_count} rows in the CSV" |
|||
end |
|||
|
|||
# Analyzes and prints out the input CSV file values |
|||
# |
|||
# @param [Array<Strong>] fields If passed, analyze only the given fields (names are down cased) |
|||
def analyze(fields = []) |
|||
puts "Analyzing CSV values frequency for #{fields.any? ? fields.join(', ') + ' field' : 'all fields'}" |
|||
|
|||
fields = fields.map &:downcase |
|||
grouped = {} |
|||
fetch do |bike_hash| |
|||
bike_hash.each do |key, value| |
|||
next if fields.any? && !fields.include?(key) |
|||
grouped[key] ||= {} |
|||
grouped[key][value] ||= 0 |
|||
grouped[key][value] += 1 |
|||
end |
|||
end |
|||
|
|||
grouped.each do |field, values| |
|||
puts "#{field}:" |
|||
values.each do |value, count| |
|||
puts "\t#{value.inspect}: #{count}" |
|||
end |
|||
puts "\tTotal of #{values.count} distinct values" |
|||
end |
|||
end |
|||
|
|||
# Imports new brands from CSV file (field 'make'). Will print out progress to stdout |
|||
# |
|||
# @param [Boolean] dry_run If true, does not save data, only shows the progress of validation |
|||
def brands(dry_run) |
|||
created_count, skipped_count = 0, 0 |
|||
|
|||
puts "Performing a #{dry_run ? 'DRY RUN' : 'LIVE RUN'} of brands import" |
|||
|
|||
fetch do |bike_hash| |
|||
make = clean_value bike_hash['make'] |
|||
brand = bike_attr_bike_brand make, true |
|||
check_method = dry_run ? :valid? : :save |
|||
|
|||
if brand.try :persisted? |
|||
puts "Skipped already existing brand #{brand.brand}" |
|||
skipped_count +=1 |
|||
elsif brand.try check_method |
|||
puts "Created brand #{brand.brand}".green |
|||
created_count += 1 |
|||
else |
|||
puts "Skipped #{brand.try(:brand) || make}: #{brand.try(:errors).try(:full_messages).try(:join, '; ') || 'object not created'}".red |
|||
skipped_count += 1 |
|||
end |
|||
end |
|||
|
|||
puts "#{created_count} brand created, #{skipped_count} brand skipped, total of #{created_count + skipped_count} rows in the CSV" |
|||
end |
|||
|
|||
|
|||
|
|||
private |
|||
|
|||
# Parses the CSV header & rows, yielding a block for each row (except the header) |
|||
# Header is down cased! |
|||
# |
|||
# @param [Proc] &block The block to yield to |
|||
def fetch |
|||
CSV.foreach(file).each_with_index do |row, i| |
|||
if i.zero? |
|||
parse_header row |
|||
else |
|||
yield parse_bike(row) |
|||
end |
|||
end |
|||
end |
|||
|
|||
# Parses & stores the input header, down casing by the way |
|||
# |
|||
# @param [Array<String>] row |
|||
def parse_header(row) |
|||
@header = row.map(&:downcase) |
|||
end |
|||
|
|||
# Parses the input row into a hash with keys from the header, @see #parse_header |
|||
# |
|||
# @param [Array<String>] row |
|||
# |
|||
# @return [Hash] |
|||
def parse_bike(row) |
|||
@header.zip(row).to_h |
|||
end |
|||
|
|||
# Constructs a new Bike instance from the given hash from a CSV row |
|||
# |
|||
# @param [Hash] bike_hash |
|||
# |
|||
# @return [Bike] |
|||
def new_bike(bike_hash) |
|||
Bike.new bike_attrs(bike_hash) |
|||
end |
|||
|
|||
# Constructs new Bike Log Entries instances from the given hash from a CSV row |
|||
# |
|||
# @param [Bike] bike The Bike instance to construct log entries for |
|||
# @param [Hash] bike_hash The input hash from a CSV row |
|||
# |
|||
# @return [Array<ActsAsLoggable::Log>] |
|||
def new_logs_entries(bike, bike_hash) |
|||
%i{ acquired comment gone }.map { |x| send :"log_entry_#{x}", bike, bike_hash }.compact |
|||
end |
|||
end |
@ -0,0 +1,93 @@ |
|||
# Helper module to create various Bike instanct fields from a CSV row hash |
|||
class BikeCsvImporter |
|||
module BikeAttrs |
|||
def bike_attr_fields |
|||
{ |
|||
shop_id: 'velocipede number', |
|||
bike_purpose_id: 'program', |
|||
#gone: 'gone', |
|||
value: 'price', |
|||
bike_brand_id: 'make', |
|||
bike_model_id: 'model', |
|||
model: 'model', |
|||
bike_style_id: nil, |
|||
bike_condition_id: nil, |
|||
seat_tube_height: nil, |
|||
bike_wheel_size_id: nil, |
|||
serial_number: nil, |
|||
} |
|||
end |
|||
|
|||
def bike_attrs(bike_hash) |
|||
bike_attr_fields.each_with_object({}) do |(model_field, csv_field), memo| |
|||
memo[model_field] = send :"bike_attr_#{model_field}", clean_value(bike_hash[csv_field]) |
|||
end |
|||
end |
|||
|
|||
def bike_attr_shop_id(value) |
|||
value.to_i |
|||
end |
|||
|
|||
def bike_attr_bike_purpose_id(value) |
|||
map = { |
|||
'SALE' => /shop|as(-|\s+)is|safety\s*check/, |
|||
'BUILDBIKE' => /build|bikes.*world/, |
|||
'STORAGE' => nil, |
|||
'PARTS' => /part|frame/, |
|||
'SCRAP' => /scrap|strip/, |
|||
} |
|||
|
|||
default = 'UNDETERMINED' |
|||
test_value = value.try :downcase |
|||
value = map.find { |_, regexp| regexp.try :match, test_value }.try :first |
|||
|
|||
cached_bike_purpose(value || default).id |
|||
end |
|||
|
|||
def bike_attr_gone(value) |
|||
%w{ yes yeah y }.include? value.try :downcase |
|||
end |
|||
|
|||
def bike_attr_value(value) |
|||
value.try(:gsub, /[$]/, '').try :to_i |
|||
end |
|||
|
|||
def bike_attr_bike_brand(value, new_if_empty = false) |
|||
value = 'Unknown' if !value || value =~ /\Aunknown/i |
|||
cached_bike_brand value, new_if_empty |
|||
end |
|||
|
|||
def bike_attr_bike_brand_id(value) |
|||
bike_attr_bike_brand(value, false).try :id |
|||
end |
|||
|
|||
def bike_attr_bike_model_id(value) |
|||
return unless value |
|||
cached_bike_model(value).try :id |
|||
end |
|||
|
|||
def bike_attr_model(value) |
|||
value if value && value !~ /unknown/i |
|||
end |
|||
|
|||
def bike_attr_bike_style_id(_) |
|||
@bike_style_other_cache ||= BikeStyle.find_by_style('OTHER').id |
|||
end |
|||
|
|||
def bike_attr_bike_condition_id(_) |
|||
@bike_condition_undertermined_cache ||= BikeCondition.find_by_condition('UNDETERMINED').id |
|||
end |
|||
|
|||
def bike_attr_seat_tube_height(_) |
|||
0 |
|||
end |
|||
|
|||
def bike_attr_bike_wheel_size_id(_) |
|||
@bike_condition_wheel_size_undertermined_cache ||= BikeWheelSize.find_by_description('UNDETERMINED').id |
|||
end |
|||
|
|||
def bike_attr_serial_number(_) |
|||
'UNDETERMINED' |
|||
end |
|||
end |
|||
end |
@ -0,0 +1,35 @@ |
|||
# Helper module to create various cached instances for bike CSV imports |
|||
class BikeCsvImporter |
|||
module Cache |
|||
def cached_bike_purpose(purpose) |
|||
@bike_purpose_cache ||= {} |
|||
@bike_purpose_cache[purpose] ||= BikePurpose.find_by_purpose purpose |
|||
end |
|||
|
|||
def cached_bike_brand(brand, new_if_empty = false) |
|||
@bike_brand_cache ||= {} |
|||
if @bike_brand_cache.has_key? brand |
|||
@bike_brand_cache[brand] |
|||
else |
|||
bike_brand = BikeBrand.where('lower(brand) = ?', brand.downcase).first |
|||
bike_brand ||= BikeBrand.new(brand: brand) if new_if_empty |
|||
|
|||
@bike_brand_cache[brand] = bike_brand |
|||
end |
|||
end |
|||
|
|||
def cached_bike_model(model) |
|||
@bike_model_cache ||= {} |
|||
if @bike_model_cache.has_key? model |
|||
@bike_model_cache[model] |
|||
else |
|||
@bike_model_cache[model] = BikeModel.where('lower(model) = ?', model.downcase).first |
|||
end |
|||
end |
|||
|
|||
def cached_log_bike_action(action) |
|||
@log_bike_action_id_cache ||= {} |
|||
@log_bike_action_id_cache[action] ||= ActsAsLoggable::BikeAction.find_by_action(action) |
|||
end |
|||
end |
|||
end |
@ -0,0 +1,16 @@ |
|||
# Helper module to clean the incoming data from CSV fields |
|||
class BikeCsvImporter |
|||
module Cleaner |
|||
def clean_value(value) |
|||
value_or_nil strip_value(value) |
|||
end |
|||
|
|||
def strip_value(value) |
|||
value.try(:strip).try(:gsub, /\n|\r/, '') |
|||
end |
|||
|
|||
def value_or_nil(value) |
|||
return value unless ['?', 'n/a', 'missing', 'unknown', ''].include? value.try(:downcase) |
|||
end |
|||
end |
|||
end |
@ -0,0 +1,42 @@ |
|||
# Helper module to create ActsAsLoggable log entries for a Bike instance from a CSV row hash |
|||
class BikeCsvImporter |
|||
module Logs |
|||
def log_entry_gone(bike, bike_hash) |
|||
if clean_value(bike_hash['gone']).to_s =~ /y/i |
|||
log_entry bike, log_entry_date(clean_value(bike_hash['date out'])), 'COMPLETED', 'Gone' |
|||
end |
|||
end |
|||
|
|||
def log_entry_acquired(bike, bike_hash) |
|||
if clean_value(bike_hash['date in']) |
|||
log_entry bike, log_entry_date(clean_value(bike_hash['date in'])), 'ACQUIRED' |
|||
end |
|||
end |
|||
|
|||
def log_entry_comment(bike, bike_hash) |
|||
if clean_value(bike_hash['comment']).present? |
|||
log_entry bike, nil, 'NOTE', clean_value(bike_hash['comment']) |
|||
end |
|||
end |
|||
|
|||
def log_entry_date(value) |
|||
return unless value |
|||
Date.strptime value, '%m/%d/%y' rescue nil |
|||
end |
|||
|
|||
def log_entry(bike, date, type, description = nil) |
|||
date ||= DateTime.now |
|||
bike_action = cached_log_bike_action(type) |
|||
|
|||
ActsAsLoggable::Log.new( |
|||
loggable_type: bike.class.to_s, |
|||
loggable_id: bike.id || bike.shop_id.to_i, # for dry run |
|||
log_action_type: bike_action.class.to_s, |
|||
log_action_id: bike_action.id, |
|||
start_date: date, |
|||
end_date: date, |
|||
description: description, |
|||
) |
|||
end |
|||
end |
|||
end |
@ -0,0 +1,36 @@ |
|||
namespace :import do |
|||
namespace :bikes do |
|||
# Imports bikes info from CSV file |
|||
# |
|||
# rake import:bikes:csv[import.csv,dry] # dry run |
|||
# rake import:bikes:csv[import.csv] # live import |
|||
task :csv, [:file, :dry_run] => :environment do |t, args| |
|||
file, dry_run = args.values_at :file, :dry_run |
|||
next puts "Usage: rake #{t.name}[$csv_file_path[,$dry_run=dry]]" unless file |
|||
next puts "File #{file} does not exist or is unreachable" unless File.readable? file |
|||
BikeCsvImporter.new(file).run dry_run == 'dry' |
|||
end |
|||
|
|||
# Analyze a single field from CSV file |
|||
# |
|||
# rake import:bikes:analyze_csv[import.csv] # dumps all fields data |
|||
# rake import:bikes:analyze_csv[import.csv,"date in"] # shows only single field |
|||
task :analyze_csv, [:file, :field] => :environment do |t, args| |
|||
file, field = args.values_at :file, :field |
|||
next puts "Usage: rake #{t.name}[$csv_file_path[,\"$field_name\"]]" unless file |
|||
next puts "File #{file} does not exist or is unreachable" unless File.readable? file |
|||
BikeCsvImporter.new(file).analyze field ? [field] : [] |
|||
end |
|||
|
|||
# Imports new brands from CSV file |
|||
# |
|||
# rake import:bikes:brands_csv[import.csv,dry] # dry run |
|||
# rake import:bikes:brands_csv[import.csv] # live import |
|||
task :brands_csv, [:file, :dry_run] => :environment do |t, args| |
|||
file, dry_run = args.values_at :file, :dry_run |
|||
next puts "Usage: rake #{t.name}[$csv_file_path[,$dry_run=dry]]" unless file |
|||
next puts "File #{file} does not exist or is unreachable" unless File.readable? file |
|||
BikeCsvImporter.new(file).brands dry_run == 'dry' |
|||
end |
|||
end |
|||
end |
Loading…
Reference in new issue