diff --git a/Gemfile b/Gemfile index 85b6137..ff271e5 100644 --- a/Gemfile +++ b/Gemfile @@ -35,6 +35,7 @@ group :development, :test do gem 'factory_girl_rails', '~> 1.2' gem 'pry', '~> 0.9.8' gem 'faker', '~> 1.2.0' + gem 'colorize' end group :test do diff --git a/Gemfile.lock b/Gemfile.lock index 3afc08d..8f14861 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -69,6 +69,7 @@ GEM coffee-script-source execjs coffee-script-source (1.10.0) + colorize (0.8.1) database_cleaner (1.2.0) decent_exposure (1.0.2) devise (2.0.6) @@ -255,6 +256,7 @@ DEPENDENCIES cancan capybara (~> 2.2.1) coffee-rails (~> 3.2.1) + colorize database_cleaner (~> 1.2.0) decent_exposure (~> 1.0.1) devise (~> 2.0.4) diff --git a/app/models/bike.rb b/app/models/bike.rb index ef6b1ba..5135938 100644 --- a/app/models/bike.rb +++ b/app/models/bike.rb @@ -1,13 +1,14 @@ class Bike < ActiveRecord::Base acts_as_loggable - attr_accessible :shop_id, :serial_number, :bike_brand_id, :model, :color, :bike_style_id, :seat_tube_height, - :top_tube_length, :bike_wheel_size_id, :value, :bike_condition_id, :bike_purpose_id, :photo + attr_accessible :shop_id, :serial_number, :bike_brand_id, :bike_model_id, :model, :color, :bike_style_id, + :seat_tube_height, :top_tube_length, :bike_wheel_size_id, :value, :bike_condition_id, :bike_purpose_id, :photo has_many :transactions has_one :owner, :class_name => 'User' has_one :task_list, :as => :item, :dependent => :destroy belongs_to :bike_brand + belongs_to :bike_model belongs_to :bike_style belongs_to :bike_condition belongs_to :bike_purpose diff --git a/app/models/bike_csv_importer.rb b/app/models/bike_csv_importer.rb new file mode 100644 index 0000000..5b7c157 --- /dev/null +++ b/app/models/bike_csv_importer.rb @@ -0,0 +1,160 @@ +require 'csv' + +# Imports data from CSV file into the bikes database. +class BikeCsvImporter + + include BikeCsvImporter::Cache + include BikeCsvImporter::Cleaner + include BikeCsvImporter::BikeAttrs + include BikeCsvImporter::Logs + + attr_reader :file + + # Default constructor + # + # @param [String] file Path to the CSV file + def initialize(file) + @file = file + end + + # Runs the import. Will print out progress to stdout + # + # @param [Boolean] dry_run If true, does not save data, only shows the progress of validation + def run(dry_run) + imported_count, skipped_count = 0, 0 + + puts "Performing a #{dry_run ? 'DRY RUN' : 'LIVE RUN'} of import" + + fetch do |bike_hash| + bike = new_bike bike_hash + check_method = dry_run ? :valid? : :save + + if bike.try check_method + puts "Imported #{bike.shop_id}: #{bike}".green + + logs = new_logs_entries bike, bike_hash + logs.each do |log| + if log.send check_method + puts "\tLog entry created: #{log.inspect}".green + else + puts "\tLog entry creation failed: #{log.errors.full_messages.join '; '}".red + end + end + + imported_count += 1 + else + puts "Skipped #{bike.try(:shop_id) || bike_hash.values.first}: #{bike.try(:errors).try(:full_messages).try :join, '; '}".red + skipped_count += 1 + end + end + + puts "#{imported_count} bikes imported, #{skipped_count} bikes skipped, total of #{imported_count + skipped_count} rows in the CSV" + end + + # Analyzes and prints out the input CSV file values + # + # @param [Array] fields If passed, analyze only the given fields (names are down cased) + def analyze(fields = []) + puts "Analyzing CSV values frequency for #{fields.any? ? fields.join(', ') + ' field' : 'all fields'}" + + fields = fields.map &:downcase + grouped = {} + fetch do |bike_hash| + bike_hash.each do |key, value| + next if fields.any? && !fields.include?(key) + grouped[key] ||= {} + grouped[key][value] ||= 0 + grouped[key][value] += 1 + end + end + + grouped.each do |field, values| + puts "#{field}:" + values.each do |value, count| + puts "\t#{value.inspect}: #{count}" + end + puts "\tTotal of #{values.count} distinct values" + end + end + + # Imports new brands from CSV file (field 'make'). Will print out progress to stdout + # + # @param [Boolean] dry_run If true, does not save data, only shows the progress of validation + def brands(dry_run) + created_count, skipped_count = 0, 0 + + puts "Performing a #{dry_run ? 'DRY RUN' : 'LIVE RUN'} of brands import" + + fetch do |bike_hash| + make = clean_value bike_hash['make'] + brand = bike_attr_bike_brand make, true + check_method = dry_run ? :valid? : :save + + if brand.try :persisted? + puts "Skipped already existing brand #{brand.brand}" + skipped_count +=1 + elsif brand.try check_method + puts "Created brand #{brand.brand}".green + created_count += 1 + else + puts "Skipped #{brand.try(:brand) || make}: #{brand.try(:errors).try(:full_messages).try(:join, '; ') || 'object not created'}".red + skipped_count += 1 + end + end + + puts "#{created_count} brand created, #{skipped_count} brand skipped, total of #{created_count + skipped_count} rows in the CSV" + end + + + + private + + # Parses the CSV header & rows, yielding a block for each row (except the header) + # Header is down cased! + # + # @param [Proc] &block The block to yield to + def fetch + CSV.foreach(file).each_with_index do |row, i| + if i.zero? + parse_header row + else + yield parse_bike(row) + end + end + end + + # Parses & stores the input header, down casing by the way + # + # @param [Array] row + def parse_header(row) + @header = row.map(&:downcase) + end + + # Parses the input row into a hash with keys from the header, @see #parse_header + # + # @param [Array] row + # + # @return [Hash] + def parse_bike(row) + @header.zip(row).to_h + end + + # Constructs a new Bike instance from the given hash from a CSV row + # + # @param [Hash] bike_hash + # + # @return [Bike] + def new_bike(bike_hash) + Bike.new bike_attrs(bike_hash) + end + + # Constructs new Bike Log Entries instances from the given hash from a CSV row + # + # @param [Bike] bike The Bike instance to construct log entries for + # @param [Hash] bike_hash The input hash from a CSV row + # + # @return [Array] + def new_logs_entries(bike, bike_hash) + %i{ acquired comment gone }.map { |x| send :"log_entry_#{x}", bike, bike_hash }.compact + end +end diff --git a/app/models/bike_csv_importer/bike_attrs.rb b/app/models/bike_csv_importer/bike_attrs.rb new file mode 100644 index 0000000..a478406 --- /dev/null +++ b/app/models/bike_csv_importer/bike_attrs.rb @@ -0,0 +1,93 @@ +# Helper module to create various Bike instanct fields from a CSV row hash +class BikeCsvImporter + module BikeAttrs + def bike_attr_fields + { + shop_id: 'velocipede number', + bike_purpose_id: 'program', + #gone: 'gone', + value: 'price', + bike_brand_id: 'make', + bike_model_id: 'model', + model: 'model', + bike_style_id: nil, + bike_condition_id: nil, + seat_tube_height: nil, + bike_wheel_size_id: nil, + serial_number: nil, + } + end + + def bike_attrs(bike_hash) + bike_attr_fields.each_with_object({}) do |(model_field, csv_field), memo| + memo[model_field] = send :"bike_attr_#{model_field}", clean_value(bike_hash[csv_field]) + end + end + + def bike_attr_shop_id(value) + value.to_i + end + + def bike_attr_bike_purpose_id(value) + map = { + 'SALE' => /shop|as(-|\s+)is|safety\s*check/, + 'BUILDBIKE' => /build|bikes.*world/, + 'STORAGE' => nil, + 'PARTS' => /part|frame/, + 'SCRAP' => /scrap|strip/, + } + + default = 'UNDETERMINED' + test_value = value.try :downcase + value = map.find { |_, regexp| regexp.try :match, test_value }.try :first + + cached_bike_purpose(value || default).id + end + + def bike_attr_gone(value) + %w{ yes yeah y }.include? value.try :downcase + end + + def bike_attr_value(value) + value.try(:gsub, /[$]/, '').try :to_i + end + + def bike_attr_bike_brand(value, new_if_empty = false) + value = 'Unknown' if !value || value =~ /\Aunknown/i + cached_bike_brand value, new_if_empty + end + + def bike_attr_bike_brand_id(value) + bike_attr_bike_brand(value, false).try :id + end + + def bike_attr_bike_model_id(value) + return unless value + cached_bike_model(value).try :id + end + + def bike_attr_model(value) + value if value && value !~ /unknown/i + end + + def bike_attr_bike_style_id(_) + @bike_style_other_cache ||= BikeStyle.find_by_style('OTHER').id + end + + def bike_attr_bike_condition_id(_) + @bike_condition_undertermined_cache ||= BikeCondition.find_by_condition('UNDETERMINED').id + end + + def bike_attr_seat_tube_height(_) + 0 + end + + def bike_attr_bike_wheel_size_id(_) + @bike_condition_wheel_size_undertermined_cache ||= BikeWheelSize.find_by_description('UNDETERMINED').id + end + + def bike_attr_serial_number(_) + 'UNDETERMINED' + end + end +end diff --git a/app/models/bike_csv_importer/cache.rb b/app/models/bike_csv_importer/cache.rb new file mode 100644 index 0000000..6a0e3a7 --- /dev/null +++ b/app/models/bike_csv_importer/cache.rb @@ -0,0 +1,35 @@ +# Helper module to create various cached instances for bike CSV imports +class BikeCsvImporter + module Cache + def cached_bike_purpose(purpose) + @bike_purpose_cache ||= {} + @bike_purpose_cache[purpose] ||= BikePurpose.find_by_purpose purpose + end + + def cached_bike_brand(brand, new_if_empty = false) + @bike_brand_cache ||= {} + if @bike_brand_cache.has_key? brand + @bike_brand_cache[brand] + else + bike_brand = BikeBrand.where('lower(brand) = ?', brand.downcase).first + bike_brand ||= BikeBrand.new(brand: brand) if new_if_empty + + @bike_brand_cache[brand] = bike_brand + end + end + + def cached_bike_model(model) + @bike_model_cache ||= {} + if @bike_model_cache.has_key? model + @bike_model_cache[model] + else + @bike_model_cache[model] = BikeModel.where('lower(model) = ?', model.downcase).first + end + end + + def cached_log_bike_action(action) + @log_bike_action_id_cache ||= {} + @log_bike_action_id_cache[action] ||= ActsAsLoggable::BikeAction.find_by_action(action) + end + end +end diff --git a/app/models/bike_csv_importer/cleaner.rb b/app/models/bike_csv_importer/cleaner.rb new file mode 100644 index 0000000..65794e7 --- /dev/null +++ b/app/models/bike_csv_importer/cleaner.rb @@ -0,0 +1,16 @@ +# Helper module to clean the incoming data from CSV fields +class BikeCsvImporter + module Cleaner + def clean_value(value) + value_or_nil strip_value(value) + end + + def strip_value(value) + value.try(:strip).try(:gsub, /\n|\r/, '') + end + + def value_or_nil(value) + return value unless ['?', 'n/a', 'missing', 'unknown', ''].include? value.try(:downcase) + end + end +end diff --git a/app/models/bike_csv_importer/logs.rb b/app/models/bike_csv_importer/logs.rb new file mode 100644 index 0000000..cbc7d92 --- /dev/null +++ b/app/models/bike_csv_importer/logs.rb @@ -0,0 +1,42 @@ +# Helper module to create ActsAsLoggable log entries for a Bike instance from a CSV row hash +class BikeCsvImporter + module Logs + def log_entry_gone(bike, bike_hash) + if clean_value(bike_hash['gone']).to_s =~ /y/i + log_entry bike, log_entry_date(clean_value(bike_hash['date out'])), 'COMPLETED', 'Gone' + end + end + + def log_entry_acquired(bike, bike_hash) + if clean_value(bike_hash['date in']) + log_entry bike, log_entry_date(clean_value(bike_hash['date in'])), 'ACQUIRED' + end + end + + def log_entry_comment(bike, bike_hash) + if clean_value(bike_hash['comment']).present? + log_entry bike, nil, 'NOTE', clean_value(bike_hash['comment']) + end + end + + def log_entry_date(value) + return unless value + Date.strptime value, '%m/%d/%y' rescue nil + end + + def log_entry(bike, date, type, description = nil) + date ||= DateTime.now + bike_action = cached_log_bike_action(type) + + ActsAsLoggable::Log.new( + loggable_type: bike.class.to_s, + loggable_id: bike.id || bike.shop_id.to_i, # for dry run + log_action_type: bike_action.class.to_s, + log_action_id: bike_action.id, + start_date: date, + end_date: date, + description: description, + ) + end + end +end diff --git a/lib/tasks/import.rake b/lib/tasks/import.rake new file mode 100644 index 0000000..43367f3 --- /dev/null +++ b/lib/tasks/import.rake @@ -0,0 +1,36 @@ +namespace :import do + namespace :bikes do + # Imports bikes info from CSV file + # + # rake import:bikes:csv[import.csv,dry] # dry run + # rake import:bikes:csv[import.csv] # live import + task :csv, [:file, :dry_run] => :environment do |t, args| + file, dry_run = args.values_at :file, :dry_run + next puts "Usage: rake #{t.name}[$csv_file_path[,$dry_run=dry]]" unless file + next puts "File #{file} does not exist or is unreachable" unless File.readable? file + BikeCsvImporter.new(file).run dry_run == 'dry' + end + + # Analyze a single field from CSV file + # + # rake import:bikes:analyze_csv[import.csv] # dumps all fields data + # rake import:bikes:analyze_csv[import.csv,"date in"] # shows only single field + task :analyze_csv, [:file, :field] => :environment do |t, args| + file, field = args.values_at :file, :field + next puts "Usage: rake #{t.name}[$csv_file_path[,\"$field_name\"]]" unless file + next puts "File #{file} does not exist or is unreachable" unless File.readable? file + BikeCsvImporter.new(file).analyze field ? [field] : [] + end + + # Imports new brands from CSV file + # + # rake import:bikes:brands_csv[import.csv,dry] # dry run + # rake import:bikes:brands_csv[import.csv] # live import + task :brands_csv, [:file, :dry_run] => :environment do |t, args| + file, dry_run = args.values_at :file, :dry_run + next puts "Usage: rake #{t.name}[$csv_file_path[,$dry_run=dry]]" unless file + next puts "File #{file} does not exist or is unreachable" unless File.readable? file + BikeCsvImporter.new(file).brands dry_run == 'dry' + end + end +end