mirror of https://github.com/fspc/BikeShed-1.git
Jason Denney
8 years ago
committed by
GitHub
9 changed files with 388 additions and 2 deletions
@ -0,0 +1,160 @@ |
|||||
|
require 'csv' |
||||
|
|
||||
|
# Imports data from CSV file into the bikes database. |
||||
|
class BikeCsvImporter |
||||
|
|
||||
|
include BikeCsvImporter::Cache |
||||
|
include BikeCsvImporter::Cleaner |
||||
|
include BikeCsvImporter::BikeAttrs |
||||
|
include BikeCsvImporter::Logs |
||||
|
|
||||
|
attr_reader :file |
||||
|
|
||||
|
# Default constructor |
||||
|
# |
||||
|
# @param [String] file Path to the CSV file |
||||
|
def initialize(file) |
||||
|
@file = file |
||||
|
end |
||||
|
|
||||
|
# Runs the import. Will print out progress to stdout |
||||
|
# |
||||
|
# @param [Boolean] dry_run If true, does not save data, only shows the progress of validation |
||||
|
def run(dry_run) |
||||
|
imported_count, skipped_count = 0, 0 |
||||
|
|
||||
|
puts "Performing a #{dry_run ? 'DRY RUN' : 'LIVE RUN'} of import" |
||||
|
|
||||
|
fetch do |bike_hash| |
||||
|
bike = new_bike bike_hash |
||||
|
check_method = dry_run ? :valid? : :save |
||||
|
|
||||
|
if bike.try check_method |
||||
|
puts "Imported #{bike.shop_id}: #{bike}".green |
||||
|
|
||||
|
logs = new_logs_entries bike, bike_hash |
||||
|
logs.each do |log| |
||||
|
if log.send check_method |
||||
|
puts "\tLog entry created: #{log.inspect}".green |
||||
|
else |
||||
|
puts "\tLog entry creation failed: #{log.errors.full_messages.join '; '}".red |
||||
|
end |
||||
|
end |
||||
|
|
||||
|
imported_count += 1 |
||||
|
else |
||||
|
puts "Skipped #{bike.try(:shop_id) || bike_hash.values.first}: #{bike.try(:errors).try(:full_messages).try :join, '; '}".red |
||||
|
skipped_count += 1 |
||||
|
end |
||||
|
end |
||||
|
|
||||
|
puts "#{imported_count} bikes imported, #{skipped_count} bikes skipped, total of #{imported_count + skipped_count} rows in the CSV" |
||||
|
end |
||||
|
|
||||
|
# Analyzes and prints out the input CSV file values |
||||
|
# |
||||
|
# @param [Array<Strong>] fields If passed, analyze only the given fields (names are down cased) |
||||
|
def analyze(fields = []) |
||||
|
puts "Analyzing CSV values frequency for #{fields.any? ? fields.join(', ') + ' field' : 'all fields'}" |
||||
|
|
||||
|
fields = fields.map &:downcase |
||||
|
grouped = {} |
||||
|
fetch do |bike_hash| |
||||
|
bike_hash.each do |key, value| |
||||
|
next if fields.any? && !fields.include?(key) |
||||
|
grouped[key] ||= {} |
||||
|
grouped[key][value] ||= 0 |
||||
|
grouped[key][value] += 1 |
||||
|
end |
||||
|
end |
||||
|
|
||||
|
grouped.each do |field, values| |
||||
|
puts "#{field}:" |
||||
|
values.each do |value, count| |
||||
|
puts "\t#{value.inspect}: #{count}" |
||||
|
end |
||||
|
puts "\tTotal of #{values.count} distinct values" |
||||
|
end |
||||
|
end |
||||
|
|
||||
|
# Imports new brands from CSV file (field 'make'). Will print out progress to stdout |
||||
|
# |
||||
|
# @param [Boolean] dry_run If true, does not save data, only shows the progress of validation |
||||
|
def brands(dry_run) |
||||
|
created_count, skipped_count = 0, 0 |
||||
|
|
||||
|
puts "Performing a #{dry_run ? 'DRY RUN' : 'LIVE RUN'} of brands import" |
||||
|
|
||||
|
fetch do |bike_hash| |
||||
|
make = clean_value bike_hash['make'] |
||||
|
brand = bike_attr_bike_brand make, true |
||||
|
check_method = dry_run ? :valid? : :save |
||||
|
|
||||
|
if brand.try :persisted? |
||||
|
puts "Skipped already existing brand #{brand.brand}" |
||||
|
skipped_count +=1 |
||||
|
elsif brand.try check_method |
||||
|
puts "Created brand #{brand.brand}".green |
||||
|
created_count += 1 |
||||
|
else |
||||
|
puts "Skipped #{brand.try(:brand) || make}: #{brand.try(:errors).try(:full_messages).try(:join, '; ') || 'object not created'}".red |
||||
|
skipped_count += 1 |
||||
|
end |
||||
|
end |
||||
|
|
||||
|
puts "#{created_count} brand created, #{skipped_count} brand skipped, total of #{created_count + skipped_count} rows in the CSV" |
||||
|
end |
||||
|
|
||||
|
|
||||
|
|
||||
|
private |
||||
|
|
||||
|
# Parses the CSV header & rows, yielding a block for each row (except the header) |
||||
|
# Header is down cased! |
||||
|
# |
||||
|
# @param [Proc] &block The block to yield to |
||||
|
def fetch |
||||
|
CSV.foreach(file).each_with_index do |row, i| |
||||
|
if i.zero? |
||||
|
parse_header row |
||||
|
else |
||||
|
yield parse_bike(row) |
||||
|
end |
||||
|
end |
||||
|
end |
||||
|
|
||||
|
# Parses & stores the input header, down casing by the way |
||||
|
# |
||||
|
# @param [Array<String>] row |
||||
|
def parse_header(row) |
||||
|
@header = row.map(&:downcase) |
||||
|
end |
||||
|
|
||||
|
# Parses the input row into a hash with keys from the header, @see #parse_header |
||||
|
# |
||||
|
# @param [Array<String>] row |
||||
|
# |
||||
|
# @return [Hash] |
||||
|
def parse_bike(row) |
||||
|
@header.zip(row).to_h |
||||
|
end |
||||
|
|
||||
|
# Constructs a new Bike instance from the given hash from a CSV row |
||||
|
# |
||||
|
# @param [Hash] bike_hash |
||||
|
# |
||||
|
# @return [Bike] |
||||
|
def new_bike(bike_hash) |
||||
|
Bike.new bike_attrs(bike_hash) |
||||
|
end |
||||
|
|
||||
|
# Constructs new Bike Log Entries instances from the given hash from a CSV row |
||||
|
# |
||||
|
# @param [Bike] bike The Bike instance to construct log entries for |
||||
|
# @param [Hash] bike_hash The input hash from a CSV row |
||||
|
# |
||||
|
# @return [Array<ActsAsLoggable::Log>] |
||||
|
def new_logs_entries(bike, bike_hash) |
||||
|
%i{ acquired comment gone }.map { |x| send :"log_entry_#{x}", bike, bike_hash }.compact |
||||
|
end |
||||
|
end |
@ -0,0 +1,93 @@ |
|||||
|
# Helper module to create various Bike instanct fields from a CSV row hash |
||||
|
class BikeCsvImporter |
||||
|
module BikeAttrs |
||||
|
def bike_attr_fields |
||||
|
{ |
||||
|
shop_id: 'velocipede number', |
||||
|
bike_purpose_id: 'program', |
||||
|
#gone: 'gone', |
||||
|
value: 'price', |
||||
|
bike_brand_id: 'make', |
||||
|
bike_model_id: 'model', |
||||
|
model: 'model', |
||||
|
bike_style_id: nil, |
||||
|
bike_condition_id: nil, |
||||
|
seat_tube_height: nil, |
||||
|
bike_wheel_size_id: nil, |
||||
|
serial_number: nil, |
||||
|
} |
||||
|
end |
||||
|
|
||||
|
def bike_attrs(bike_hash) |
||||
|
bike_attr_fields.each_with_object({}) do |(model_field, csv_field), memo| |
||||
|
memo[model_field] = send :"bike_attr_#{model_field}", clean_value(bike_hash[csv_field]) |
||||
|
end |
||||
|
end |
||||
|
|
||||
|
def bike_attr_shop_id(value) |
||||
|
value.to_i |
||||
|
end |
||||
|
|
||||
|
def bike_attr_bike_purpose_id(value) |
||||
|
map = { |
||||
|
'SALE' => /shop|as(-|\s+)is|safety\s*check/, |
||||
|
'BUILDBIKE' => /build|bikes.*world/, |
||||
|
'STORAGE' => nil, |
||||
|
'PARTS' => /part|frame/, |
||||
|
'SCRAP' => /scrap|strip/, |
||||
|
} |
||||
|
|
||||
|
default = 'UNDETERMINED' |
||||
|
test_value = value.try :downcase |
||||
|
value = map.find { |_, regexp| regexp.try :match, test_value }.try :first |
||||
|
|
||||
|
cached_bike_purpose(value || default).id |
||||
|
end |
||||
|
|
||||
|
def bike_attr_gone(value) |
||||
|
%w{ yes yeah y }.include? value.try :downcase |
||||
|
end |
||||
|
|
||||
|
def bike_attr_value(value) |
||||
|
value.try(:gsub, /[$]/, '').try :to_i |
||||
|
end |
||||
|
|
||||
|
def bike_attr_bike_brand(value, new_if_empty = false) |
||||
|
value = 'Unknown' if !value || value =~ /\Aunknown/i |
||||
|
cached_bike_brand value, new_if_empty |
||||
|
end |
||||
|
|
||||
|
def bike_attr_bike_brand_id(value) |
||||
|
bike_attr_bike_brand(value, false).try :id |
||||
|
end |
||||
|
|
||||
|
def bike_attr_bike_model_id(value) |
||||
|
return unless value |
||||
|
cached_bike_model(value).try :id |
||||
|
end |
||||
|
|
||||
|
def bike_attr_model(value) |
||||
|
value if value && value !~ /unknown/i |
||||
|
end |
||||
|
|
||||
|
def bike_attr_bike_style_id(_) |
||||
|
@bike_style_other_cache ||= BikeStyle.find_by_style('OTHER').id |
||||
|
end |
||||
|
|
||||
|
def bike_attr_bike_condition_id(_) |
||||
|
@bike_condition_undertermined_cache ||= BikeCondition.find_by_condition('UNDETERMINED').id |
||||
|
end |
||||
|
|
||||
|
def bike_attr_seat_tube_height(_) |
||||
|
0 |
||||
|
end |
||||
|
|
||||
|
def bike_attr_bike_wheel_size_id(_) |
||||
|
@bike_condition_wheel_size_undertermined_cache ||= BikeWheelSize.find_by_description('UNDETERMINED').id |
||||
|
end |
||||
|
|
||||
|
def bike_attr_serial_number(_) |
||||
|
'UNDETERMINED' |
||||
|
end |
||||
|
end |
||||
|
end |
@ -0,0 +1,35 @@ |
|||||
|
# Helper module to create various cached instances for bike CSV imports |
||||
|
class BikeCsvImporter |
||||
|
module Cache |
||||
|
def cached_bike_purpose(purpose) |
||||
|
@bike_purpose_cache ||= {} |
||||
|
@bike_purpose_cache[purpose] ||= BikePurpose.find_by_purpose purpose |
||||
|
end |
||||
|
|
||||
|
def cached_bike_brand(brand, new_if_empty = false) |
||||
|
@bike_brand_cache ||= {} |
||||
|
if @bike_brand_cache.has_key? brand |
||||
|
@bike_brand_cache[brand] |
||||
|
else |
||||
|
bike_brand = BikeBrand.where('lower(brand) = ?', brand.downcase).first |
||||
|
bike_brand ||= BikeBrand.new(brand: brand) if new_if_empty |
||||
|
|
||||
|
@bike_brand_cache[brand] = bike_brand |
||||
|
end |
||||
|
end |
||||
|
|
||||
|
def cached_bike_model(model) |
||||
|
@bike_model_cache ||= {} |
||||
|
if @bike_model_cache.has_key? model |
||||
|
@bike_model_cache[model] |
||||
|
else |
||||
|
@bike_model_cache[model] = BikeModel.where('lower(model) = ?', model.downcase).first |
||||
|
end |
||||
|
end |
||||
|
|
||||
|
def cached_log_bike_action(action) |
||||
|
@log_bike_action_id_cache ||= {} |
||||
|
@log_bike_action_id_cache[action] ||= ActsAsLoggable::BikeAction.find_by_action(action) |
||||
|
end |
||||
|
end |
||||
|
end |
@ -0,0 +1,16 @@ |
|||||
|
# Helper module to clean the incoming data from CSV fields |
||||
|
class BikeCsvImporter |
||||
|
module Cleaner |
||||
|
def clean_value(value) |
||||
|
value_or_nil strip_value(value) |
||||
|
end |
||||
|
|
||||
|
def strip_value(value) |
||||
|
value.try(:strip).try(:gsub, /\n|\r/, '') |
||||
|
end |
||||
|
|
||||
|
def value_or_nil(value) |
||||
|
return value unless ['?', 'n/a', 'missing', 'unknown', ''].include? value.try(:downcase) |
||||
|
end |
||||
|
end |
||||
|
end |
@ -0,0 +1,42 @@ |
|||||
|
# Helper module to create ActsAsLoggable log entries for a Bike instance from a CSV row hash |
||||
|
class BikeCsvImporter |
||||
|
module Logs |
||||
|
def log_entry_gone(bike, bike_hash) |
||||
|
if clean_value(bike_hash['gone']).to_s =~ /y/i |
||||
|
log_entry bike, log_entry_date(clean_value(bike_hash['date out'])), 'COMPLETED', 'Gone' |
||||
|
end |
||||
|
end |
||||
|
|
||||
|
def log_entry_acquired(bike, bike_hash) |
||||
|
if clean_value(bike_hash['date in']) |
||||
|
log_entry bike, log_entry_date(clean_value(bike_hash['date in'])), 'ACQUIRED' |
||||
|
end |
||||
|
end |
||||
|
|
||||
|
def log_entry_comment(bike, bike_hash) |
||||
|
if clean_value(bike_hash['comment']).present? |
||||
|
log_entry bike, nil, 'NOTE', clean_value(bike_hash['comment']) |
||||
|
end |
||||
|
end |
||||
|
|
||||
|
def log_entry_date(value) |
||||
|
return unless value |
||||
|
Date.strptime value, '%m/%d/%y' rescue nil |
||||
|
end |
||||
|
|
||||
|
def log_entry(bike, date, type, description = nil) |
||||
|
date ||= DateTime.now |
||||
|
bike_action = cached_log_bike_action(type) |
||||
|
|
||||
|
ActsAsLoggable::Log.new( |
||||
|
loggable_type: bike.class.to_s, |
||||
|
loggable_id: bike.id || bike.shop_id.to_i, # for dry run |
||||
|
log_action_type: bike_action.class.to_s, |
||||
|
log_action_id: bike_action.id, |
||||
|
start_date: date, |
||||
|
end_date: date, |
||||
|
description: description, |
||||
|
) |
||||
|
end |
||||
|
end |
||||
|
end |
@ -0,0 +1,36 @@ |
|||||
|
namespace :import do |
||||
|
namespace :bikes do |
||||
|
# Imports bikes info from CSV file |
||||
|
# |
||||
|
# rake import:bikes:csv[import.csv,dry] # dry run |
||||
|
# rake import:bikes:csv[import.csv] # live import |
||||
|
task :csv, [:file, :dry_run] => :environment do |t, args| |
||||
|
file, dry_run = args.values_at :file, :dry_run |
||||
|
next puts "Usage: rake #{t.name}[$csv_file_path[,$dry_run=dry]]" unless file |
||||
|
next puts "File #{file} does not exist or is unreachable" unless File.readable? file |
||||
|
BikeCsvImporter.new(file).run dry_run == 'dry' |
||||
|
end |
||||
|
|
||||
|
# Analyze a single field from CSV file |
||||
|
# |
||||
|
# rake import:bikes:analyze_csv[import.csv] # dumps all fields data |
||||
|
# rake import:bikes:analyze_csv[import.csv,"date in"] # shows only single field |
||||
|
task :analyze_csv, [:file, :field] => :environment do |t, args| |
||||
|
file, field = args.values_at :file, :field |
||||
|
next puts "Usage: rake #{t.name}[$csv_file_path[,\"$field_name\"]]" unless file |
||||
|
next puts "File #{file} does not exist or is unreachable" unless File.readable? file |
||||
|
BikeCsvImporter.new(file).analyze field ? [field] : [] |
||||
|
end |
||||
|
|
||||
|
# Imports new brands from CSV file |
||||
|
# |
||||
|
# rake import:bikes:brands_csv[import.csv,dry] # dry run |
||||
|
# rake import:bikes:brands_csv[import.csv] # live import |
||||
|
task :brands_csv, [:file, :dry_run] => :environment do |t, args| |
||||
|
file, dry_run = args.values_at :file, :dry_run |
||||
|
next puts "Usage: rake #{t.name}[$csv_file_path[,$dry_run=dry]]" unless file |
||||
|
next puts "File #{file} does not exist or is unreachable" unless File.readable? file |
||||
|
BikeCsvImporter.new(file).brands dry_run == 'dry' |
||||
|
end |
||||
|
end |
||||
|
end |
Loading…
Reference in new issue