From 904b824c8bdbcd60d543db6fcaac069cfdb67c3e Mon Sep 17 00:00:00 2001 From: Ilya Konanykhin Date: Wed, 15 Mar 2017 15:17:40 +0600 Subject: [PATCH 01/17] Create rake task (import:bikes:csv, import:bikes:analyze_csv) & importing class --- app/models/bikes_csv_importer.rb | 67 ++++++++++++++++++++++++++++++++ lib/tasks/import.rake | 19 +++++++++ 2 files changed, 86 insertions(+) create mode 100644 app/models/bikes_csv_importer.rb create mode 100644 lib/tasks/import.rake diff --git a/app/models/bikes_csv_importer.rb b/app/models/bikes_csv_importer.rb new file mode 100644 index 0000000..9c957ee --- /dev/null +++ b/app/models/bikes_csv_importer.rb @@ -0,0 +1,67 @@ +require 'csv' + +class BikesCsvImporter + attr_reader :file + + def initialize(file) + @file = file + end + + def run + fetch do |bike_hash| + import_bike bike_hash + end + end + + def analyze(fields = []) + fields = fields.map &:downcase + grouped = {} + fetch do |bike_hash| + bike_hash.each do |key, value| + next if fields.any? && !fields.include?(key.downcase) + grouped[key] ||= {} + grouped[key][value] ||= 0 + grouped[key][value] += 1 + end + end + grouped + end + + + + private + + def fetch + CSV.foreach(file).each_with_index do |row, i| + if i.zero? + parse_header row + else + yield parse_bike(row) + end + end + end + + def parse_header(row) + @header = row + end + + def parse_bike(row) + @header.zip(row).to_h + end + + def import_bike(bike_hash) + # TODO + end + + def clean_value(value) + value_or_nil strip_value(value) + end + + def strip_value(value) + value.try(:strip).try(:gsub, /\n|\r/, '') + end + + def value_or_nil(value) + return value unless ['?', 'n/a', 'missing', ''].include? value.try(:downcase) + end +end diff --git a/lib/tasks/import.rake b/lib/tasks/import.rake new file mode 100644 index 0000000..760e9e6 --- /dev/null +++ b/lib/tasks/import.rake @@ -0,0 +1,19 @@ +namespace :import do + namespace :bikes do + # Imports bikes info from CSV file + task :csv, [:file] => :environment do |t, args| + file = args[:file] + next puts "Usage: rake #{t.name}[$csv_file_path]" unless file + next puts "File #{file} does not exist or is unreachable" unless File.readable? file + pp BikesCsvImporter.new(file).run + end + + # Analyze a single field from CSV file + task :analyze_csv, [:file, :field] => :environment do |t, args| + file, field = args.values_at :file, :field + next puts "Usage: rake #{t.name}[$csv_file_path[,\"$field_name\"]]" unless file + next puts "File #{file} does not exist or is unreachable" unless File.readable? file + pp BikesCsvImporter.new(file).analyze field ? [field] : [] + end + end +end From 5564a1931f62fac4ec477917610d0bdac90e05e9 Mon Sep 17 00:00:00 2001 From: Ilya Konanykhin Date: Thu, 16 Mar 2017 15:17:14 +0600 Subject: [PATCH 02/17] BikeCsvImporter: import bikes, first step (main code, shop_id) --- app/models/bike_csv_importer.rb | 113 +++++++++++++++++++++++++++++++ app/models/bikes_csv_importer.rb | 67 ------------------ lib/tasks/import.rake | 4 +- 3 files changed, 115 insertions(+), 69 deletions(-) create mode 100644 app/models/bike_csv_importer.rb delete mode 100644 app/models/bikes_csv_importer.rb diff --git a/app/models/bike_csv_importer.rb b/app/models/bike_csv_importer.rb new file mode 100644 index 0000000..53fa7ca --- /dev/null +++ b/app/models/bike_csv_importer.rb @@ -0,0 +1,113 @@ +require 'csv' + +class BikeCsvImporter + attr_reader :file + + def initialize(file) + @file = file + end + + def run + result = {imported_count: 0, skipped_shop_ids: []} + fetch do |bike_hash| + bike = import_bike bike_hash + if bike.try :persisted? + result[:imported_count] += 1 + else + result[:skipped_shop_ids].push bike.try(:shop_id) || bike_hash.values.first + end + end + result + end + + def analyze(fields = []) + fields = fields.map &:downcase + grouped = {} + fetch do |bike_hash| + bike_hash.each do |key, value| + next if fields.any? && !fields.include?(key) + grouped[key] ||= {} + grouped[key][value] ||= 0 + grouped[key][value] += 1 + end + end + grouped + end + + + + private + + def fetch + CSV.foreach(file).each_with_index do |row, i| + if i.zero? + parse_header row + else + yield parse_bike(row) + end + end + end + + def parse_header(row) + @header = row.map(&:downcase) + end + + def parse_bike(row) + @header.zip(row).to_h + end + + # + Velocipede Number -> Bikes.shop_id + # Program -> Bikes.bike_purpose_id + # + # Gone -> If "Yes", set 'gone' to true, then create a Log entry like the following: + # id | loggable_id | loggable_type | logger_id | logger_type | context | start_date | end_date | description | log_action_id | log_action_type | created_at | updated_at + # 18 | 1 | Bike | 4 | User | | 2017-02-03 23:27:00 | 2017-02-03 23:27:00 | Gone | 5 | ActsAsLoggable::BikeAction | 2017-02-03 23:27:36.8387 | 2017-02-03 23:27:36.8387 + # See https://github.com/spacemunkay/BikeShed/blob/master/app/components/bike_logs.rb#L12-L18 for example. Use user_id 1 for current_user_id (1 should be the admin ID I think). Use "Date Out" column for start_date & end_date. Set action_id to "COMPLETED". + # + # Date In -> Create a bike log entry with start_date & end_date with same value as "Date In". Set action_id to "AQUIRED" + # Date Out -> Should be the start_date & end_date value for "Gone" column mentioned above. + # Price -> Bikes.value + # Make -> Bikes.bike_brand_id + # Model -> Bikes.bike_model_id + # to Whom -> ignore + # Zip Code -> ignore + # Comment -> Create a bike log entry with action_id "NOTE". The log 'description' should be the value of 'Comment'. + # + # Data at the end of the CSV seems to be missing a lot of fields. If any field is empty, the the value can be "UNDETERMINED" if applicable, or ignored. Any other dates beside "Date In/Out" can be current date. + # + # I realize the log entry stuff is likely complicated and time consuming. At a minimum, the most important columns to import are the following: Velocipede Number, Program, Gone, Make, Model. To avoid creating the log entry for "Gone", we would instead just set 'gone' to true. + def import_bike(bike_hash) + bike = Bike.new bike_attrs(bike_hash) + bike.save + end + + def bike_attrs(bike_hash) + %i{ shop_id bike_purpose gone }.each_with_object({}) do |field, memo| + memo[field] = send :"bike_attr_#{ field }", bike_hash + end + end + + def bike_attr_shop_id(bike_hash) + bike_hash['velocipede number'].to_i + end + + def bike_attr_bike_purpose(bike_hash) + # TODO bike_hash['program'] + end + + def bike_attr_gone(bike_hash) + %w{ yes yeah y }.include? clean_value(bike_hash['gone']).try :downcase + end + + def clean_value(value) + value_or_nil strip_value(value) + end + + def strip_value(value) + value.try(:strip).try(:gsub, /\n|\r/, '') + end + + def value_or_nil(value) + return value unless ['?', 'n/a', 'missing', 'unknown', ''].include? value.try(:downcase) + end +end diff --git a/app/models/bikes_csv_importer.rb b/app/models/bikes_csv_importer.rb deleted file mode 100644 index 9c957ee..0000000 --- a/app/models/bikes_csv_importer.rb +++ /dev/null @@ -1,67 +0,0 @@ -require 'csv' - -class BikesCsvImporter - attr_reader :file - - def initialize(file) - @file = file - end - - def run - fetch do |bike_hash| - import_bike bike_hash - end - end - - def analyze(fields = []) - fields = fields.map &:downcase - grouped = {} - fetch do |bike_hash| - bike_hash.each do |key, value| - next if fields.any? && !fields.include?(key.downcase) - grouped[key] ||= {} - grouped[key][value] ||= 0 - grouped[key][value] += 1 - end - end - grouped - end - - - - private - - def fetch - CSV.foreach(file).each_with_index do |row, i| - if i.zero? - parse_header row - else - yield parse_bike(row) - end - end - end - - def parse_header(row) - @header = row - end - - def parse_bike(row) - @header.zip(row).to_h - end - - def import_bike(bike_hash) - # TODO - end - - def clean_value(value) - value_or_nil strip_value(value) - end - - def strip_value(value) - value.try(:strip).try(:gsub, /\n|\r/, '') - end - - def value_or_nil(value) - return value unless ['?', 'n/a', 'missing', ''].include? value.try(:downcase) - end -end diff --git a/lib/tasks/import.rake b/lib/tasks/import.rake index 760e9e6..78b0425 100644 --- a/lib/tasks/import.rake +++ b/lib/tasks/import.rake @@ -5,7 +5,7 @@ namespace :import do file = args[:file] next puts "Usage: rake #{t.name}[$csv_file_path]" unless file next puts "File #{file} does not exist or is unreachable" unless File.readable? file - pp BikesCsvImporter.new(file).run + pp BikeCsvImporter.new(file).run end # Analyze a single field from CSV file @@ -13,7 +13,7 @@ namespace :import do file, field = args.values_at :file, :field next puts "Usage: rake #{t.name}[$csv_file_path[,\"$field_name\"]]" unless file next puts "File #{file} does not exist or is unreachable" unless File.readable? file - pp BikesCsvImporter.new(file).analyze field ? [field] : [] + pp BikeCsvImporter.new(file).analyze field ? [field] : [] end end end From c79ec5768499c69ccf120119ee8f053000beab5e Mon Sep 17 00:00:00 2001 From: Ilya Konanykhin Date: Fri, 17 Mar 2017 15:15:23 +0600 Subject: [PATCH 03/17] BikeCsvImporter: add "purpose_id", temporary remove "gone" --- app/models/bike_csv_importer.rb | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/app/models/bike_csv_importer.rb b/app/models/bike_csv_importer.rb index 53fa7ca..cf5c3b4 100644 --- a/app/models/bike_csv_importer.rb +++ b/app/models/bike_csv_importer.rb @@ -78,11 +78,11 @@ class BikeCsvImporter # I realize the log entry stuff is likely complicated and time consuming. At a minimum, the most important columns to import are the following: Velocipede Number, Program, Gone, Make, Model. To avoid creating the log entry for "Gone", we would instead just set 'gone' to true. def import_bike(bike_hash) bike = Bike.new bike_attrs(bike_hash) - bike.save + #bike.save end def bike_attrs(bike_hash) - %i{ shop_id bike_purpose gone }.each_with_object({}) do |field, memo| + %i{ shop_id bike_purpose_id }.each_with_object({}) do |field, memo| memo[field] = send :"bike_attr_#{ field }", bike_hash end end @@ -91,8 +91,20 @@ class BikeCsvImporter bike_hash['velocipede number'].to_i end - def bike_attr_bike_purpose(bike_hash) - # TODO bike_hash['program'] + def bike_attr_bike_purpose_id(bike_hash) + map = { + 'SALE' => /shop|as(-|\s+)is|safety\s*check/, + 'BUILDBIKE' => /build|bikes.*world/, + 'STORAGE' => nil, + 'PARTS' => /part|frame/, + 'SCRAP' => /scrap|strip/, + } + + default = 'UNDETERMINED' + test_value = clean_value(bike_hash['program']).try :downcase + value = map.find { |_, regexp| regexp.try :match, test_value }.try :first + + cached_bike_purpose(value || default).id end def bike_attr_gone(bike_hash) @@ -110,4 +122,9 @@ class BikeCsvImporter def value_or_nil(value) return value unless ['?', 'n/a', 'missing', 'unknown', ''].include? value.try(:downcase) end + + def cached_bike_purpose(purpose) + @bike_purpose_cache ||= {} + @bike_purpose_cache[purpose] ||= BikePurpose.find_by_purpose purpose + end end From ae8b5cccd03830c84e11d6f5e206a049d73dfdf5 Mon Sep 17 00:00:00 2001 From: Ilya Konanykhin Date: Fri, 17 Mar 2017 15:22:37 +0600 Subject: [PATCH 04/17] BikeCsvImporter: add "value" --- app/models/bike_csv_importer.rb | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/app/models/bike_csv_importer.rb b/app/models/bike_csv_importer.rb index cf5c3b4..dc22774 100644 --- a/app/models/bike_csv_importer.rb +++ b/app/models/bike_csv_importer.rb @@ -57,20 +57,20 @@ class BikeCsvImporter end # + Velocipede Number -> Bikes.shop_id - # Program -> Bikes.bike_purpose_id + # + Program -> Bikes.bike_purpose_id # # Gone -> If "Yes", set 'gone' to true, then create a Log entry like the following: - # id | loggable_id | loggable_type | logger_id | logger_type | context | start_date | end_date | description | log_action_id | log_action_type | created_at | updated_at + # id | loggable_id | loggable_type | logger_id | logger_type | context | start_date | end_date | description | log_action_id | log_action_type | created_at | updated_at # 18 | 1 | Bike | 4 | User | | 2017-02-03 23:27:00 | 2017-02-03 23:27:00 | Gone | 5 | ActsAsLoggable::BikeAction | 2017-02-03 23:27:36.8387 | 2017-02-03 23:27:36.8387 # See https://github.com/spacemunkay/BikeShed/blob/master/app/components/bike_logs.rb#L12-L18 for example. Use user_id 1 for current_user_id (1 should be the admin ID I think). Use "Date Out" column for start_date & end_date. Set action_id to "COMPLETED". # # Date In -> Create a bike log entry with start_date & end_date with same value as "Date In". Set action_id to "AQUIRED" # Date Out -> Should be the start_date & end_date value for "Gone" column mentioned above. - # Price -> Bikes.value + # + Price -> Bikes.value # Make -> Bikes.bike_brand_id # Model -> Bikes.bike_model_id - # to Whom -> ignore - # Zip Code -> ignore + # + to Whom -> ignore + # + Zip Code -> ignore # Comment -> Create a bike log entry with action_id "NOTE". The log 'description' should be the value of 'Comment'. # # Data at the end of the CSV seems to be missing a lot of fields. If any field is empty, the the value can be "UNDETERMINED" if applicable, or ignored. Any other dates beside "Date In/Out" can be current date. @@ -82,7 +82,7 @@ class BikeCsvImporter end def bike_attrs(bike_hash) - %i{ shop_id bike_purpose_id }.each_with_object({}) do |field, memo| + %i{ shop_id bike_purpose_id value }.each_with_object({}) do |field, memo| memo[field] = send :"bike_attr_#{ field }", bike_hash end end @@ -111,6 +111,10 @@ class BikeCsvImporter %w{ yes yeah y }.include? clean_value(bike_hash['gone']).try :downcase end + def bike_attr_value(bike_hash) + clean_value(bike_hash['price']).try(:gsub, /[$]/, '').try :to_i + end + def clean_value(value) value_or_nil strip_value(value) end From c17159835497438c6adc219e5ac0e4ec54dfcecd Mon Sep 17 00:00:00 2001 From: Ilya Konanykhin Date: Mon, 20 Mar 2017 00:08:16 +0600 Subject: [PATCH 05/17] BikeCsvImporter: add "bike_brand_id", "bike_model_id", "model" --- app/models/bike_csv_importer.rb | 53 +++++++++++++++++++++++++++++---- 1 file changed, 48 insertions(+), 5 deletions(-) diff --git a/app/models/bike_csv_importer.rb b/app/models/bike_csv_importer.rb index dc22774..bd193a5 100644 --- a/app/models/bike_csv_importer.rb +++ b/app/models/bike_csv_importer.rb @@ -9,6 +9,11 @@ class BikeCsvImporter def run result = {imported_count: 0, skipped_shop_ids: []} + + @bike_purpose_cache = {} + @bike_brand_cache = {} + @bike_model_cache = {} + fetch do |bike_hash| bike = import_bike bike_hash if bike.try :persisted? @@ -17,6 +22,13 @@ class BikeCsvImporter result[:skipped_shop_ids].push bike.try(:shop_id) || bike_hash.values.first end end + + missing_brands = @bike_brand_cache.select { |_, v| v.nil? }.map(&:first) + result[:missing_brands] = missing_brands if missing_brands.any? + + missing_models = @bike_model_cache.select { |_, v| v.nil? }.map(&:first) + result[:missing_models] = missing_models if missing_models.any? + result end @@ -67,8 +79,8 @@ class BikeCsvImporter # Date In -> Create a bike log entry with start_date & end_date with same value as "Date In". Set action_id to "AQUIRED" # Date Out -> Should be the start_date & end_date value for "Gone" column mentioned above. # + Price -> Bikes.value - # Make -> Bikes.bike_brand_id - # Model -> Bikes.bike_model_id + # + Make -> Bikes.bike_brand_id + # + Model -> Bikes.bike_model_id # + to Whom -> ignore # + Zip Code -> ignore # Comment -> Create a bike log entry with action_id "NOTE". The log 'description' should be the value of 'Comment'. @@ -82,7 +94,7 @@ class BikeCsvImporter end def bike_attrs(bike_hash) - %i{ shop_id bike_purpose_id value }.each_with_object({}) do |field, memo| + %i{ shop_id bike_purpose_id value bike_brand_id bike_model_id model }.each_with_object({}) do |field, memo| memo[field] = send :"bike_attr_#{ field }", bike_hash end end @@ -115,6 +127,22 @@ class BikeCsvImporter clean_value(bike_hash['price']).try(:gsub, /[$]/, '').try :to_i end + def bike_attr_bike_brand_id(bike_hash) + brand = clean_value(bike_hash['make']) + return unless brand + cached_bike_brand(brand).try :id + end + + def bike_attr_bike_model_id(bike_hash) + model = clean_value(bike_hash['model']) + return unless model + cached_bike_model(model).try :id + end + + def bike_attr_model(bike_hash) + clean_value bike_hash['model'] + end + def clean_value(value) value_or_nil strip_value(value) end @@ -128,7 +156,22 @@ class BikeCsvImporter end def cached_bike_purpose(purpose) - @bike_purpose_cache ||= {} - @bike_purpose_cache[purpose] ||= BikePurpose.find_by_purpose purpose + @bike_purpose_cache[purpose] ||= BikePurpose.find_by_purpose purpose + end + + def cached_bike_brand(brand) + if @bike_brand_cache.has_key? brand + @bike_brand_cache[brand] + else + @bike_brand_cache[brand] = BikeBrand.where('lower(brand) = ?', brand.downcase).first + end + end + + def cached_bike_model(model) + if @bike_model_cache.has_key? model + @bike_model_cache[model] + else + @bike_model_cache[model] = BikeModel.where('lower(model) = ?', model.downcase).first + end end end From 95dbe162f78bcf931711f5ed9515aee17bf96415 Mon Sep 17 00:00:00 2001 From: Ilya Konanykhin Date: Mon, 20 Mar 2017 00:09:01 +0600 Subject: [PATCH 06/17] BikeCsvImporter: make Bike#bike_model_id field accessible and specify it as a foreign key explicitly --- app/models/bike.rb | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/app/models/bike.rb b/app/models/bike.rb index ef6b1ba..5135938 100644 --- a/app/models/bike.rb +++ b/app/models/bike.rb @@ -1,13 +1,14 @@ class Bike < ActiveRecord::Base acts_as_loggable - attr_accessible :shop_id, :serial_number, :bike_brand_id, :model, :color, :bike_style_id, :seat_tube_height, - :top_tube_length, :bike_wheel_size_id, :value, :bike_condition_id, :bike_purpose_id, :photo + attr_accessible :shop_id, :serial_number, :bike_brand_id, :bike_model_id, :model, :color, :bike_style_id, + :seat_tube_height, :top_tube_length, :bike_wheel_size_id, :value, :bike_condition_id, :bike_purpose_id, :photo has_many :transactions has_one :owner, :class_name => 'User' has_one :task_list, :as => :item, :dependent => :destroy belongs_to :bike_brand + belongs_to :bike_model belongs_to :bike_style belongs_to :bike_condition belongs_to :bike_purpose From 0083dd9bca984420d7c8ae9d74cbc7fa5da406e2 Mon Sep 17 00:00:00 2001 From: Ilya Konanykhin Date: Mon, 20 Mar 2017 00:18:32 +0600 Subject: [PATCH 07/17] BikeCsvImporter: add dry run --- app/models/bike_csv_importer.rb | 18 +++++++++++------- lib/tasks/import.rake | 8 ++++---- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/app/models/bike_csv_importer.rb b/app/models/bike_csv_importer.rb index bd193a5..bd1bce5 100644 --- a/app/models/bike_csv_importer.rb +++ b/app/models/bike_csv_importer.rb @@ -1,14 +1,15 @@ require 'csv' class BikeCsvImporter - attr_reader :file + attr_reader :file, :dry_run - def initialize(file) - @file = file + def initialize(file, dry_run) + @file = file + @dry_run = !!dry_run end def run - result = {imported_count: 0, skipped_shop_ids: []} + result = {imported_count: 0, skipped_errors: {}} @bike_purpose_cache = {} @bike_brand_cache = {} @@ -16,10 +17,11 @@ class BikeCsvImporter fetch do |bike_hash| bike = import_bike bike_hash - if bike.try :persisted? + check_method = dry_run ? :valid? : :persisted? + if bike.try check_method result[:imported_count] += 1 else - result[:skipped_shop_ids].push bike.try(:shop_id) || bike_hash.values.first + result[:skipped_errors][bike.try(:shop_id) || bike_hash.values.first] = bike.try(:errors).try(:messages) end end @@ -90,7 +92,9 @@ class BikeCsvImporter # I realize the log entry stuff is likely complicated and time consuming. At a minimum, the most important columns to import are the following: Velocipede Number, Program, Gone, Make, Model. To avoid creating the log entry for "Gone", we would instead just set 'gone' to true. def import_bike(bike_hash) bike = Bike.new bike_attrs(bike_hash) - #bike.save + #bike.save unless dry_run + raise 'TODO save' unless dry_run + bike end def bike_attrs(bike_hash) diff --git a/lib/tasks/import.rake b/lib/tasks/import.rake index 78b0425..f459413 100644 --- a/lib/tasks/import.rake +++ b/lib/tasks/import.rake @@ -1,11 +1,11 @@ namespace :import do namespace :bikes do # Imports bikes info from CSV file - task :csv, [:file] => :environment do |t, args| - file = args[:file] - next puts "Usage: rake #{t.name}[$csv_file_path]" unless file + task :csv, [:file, :dry_run] => :environment do |t, args| + file, dry_run = args.values_at :file, :dry_run + next puts "Usage: rake #{t.name}[$csv_file_path[,$dry_run=dry]]" unless file next puts "File #{file} does not exist or is unreachable" unless File.readable? file - pp BikeCsvImporter.new(file).run + pp BikeCsvImporter.new(file, dry_run == 'dry').run end # Analyze a single field from CSV file From 802db2ba34b7f89249407692fc7db139b53469b4 Mon Sep 17 00:00:00 2001 From: Ilya Konanykhin Date: Mon, 20 Mar 2017 00:26:47 +0600 Subject: [PATCH 08/17] BikeCsvImporter: print successfull matches & fix "unknown" model --- app/models/bike_csv_importer.rb | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/app/models/bike_csv_importer.rb b/app/models/bike_csv_importer.rb index bd1bce5..089a36f 100644 --- a/app/models/bike_csv_importer.rb +++ b/app/models/bike_csv_importer.rb @@ -9,7 +9,7 @@ class BikeCsvImporter end def run - result = {imported_count: 0, skipped_errors: {}} + result = {imported: {}, skipped: {}} @bike_purpose_cache = {} @bike_brand_cache = {} @@ -19,9 +19,9 @@ class BikeCsvImporter bike = import_bike bike_hash check_method = dry_run ? :valid? : :persisted? if bike.try check_method - result[:imported_count] += 1 + result[:imported][bike.shop_id] = bike.inspect else - result[:skipped_errors][bike.try(:shop_id) || bike_hash.values.first] = bike.try(:errors).try(:messages) + result[:skipped][bike.try(:shop_id) || bike_hash.values.first] = bike.try(:errors).try(:messages) end end @@ -144,7 +144,8 @@ class BikeCsvImporter end def bike_attr_model(bike_hash) - clean_value bike_hash['model'] + model = clean_value bike_hash['model'] + model unless model =~ /unknown/i end def clean_value(value) From cfe81d6e65d5fa1cc18aa8600ada2ccc30795d4c Mon Sep 17 00:00:00 2001 From: Ilya Konanykhin Date: Mon, 20 Mar 2017 00:38:24 +0600 Subject: [PATCH 09/17] BikeCsvImporter: add dummy field values for "bike_condition_id", "bike_wheel_size_id", "bike_style_id", "serial_number", "seat_tube_height" --- app/models/bike_csv_importer.rb | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/app/models/bike_csv_importer.rb b/app/models/bike_csv_importer.rb index 089a36f..cac2963 100644 --- a/app/models/bike_csv_importer.rb +++ b/app/models/bike_csv_importer.rb @@ -98,7 +98,7 @@ class BikeCsvImporter end def bike_attrs(bike_hash) - %i{ shop_id bike_purpose_id value bike_brand_id bike_model_id model }.each_with_object({}) do |field, memo| + %i{ shop_id bike_purpose_id value bike_brand_id bike_model_id model bike_style_id bike_condition_id seat_tube_height bike_wheel_size_id serial_number }.each_with_object({}) do |field, memo| memo[field] = send :"bike_attr_#{ field }", bike_hash end end @@ -148,6 +148,26 @@ class BikeCsvImporter model unless model =~ /unknown/i end + def bike_attr_bike_style_id(_) + @bike_style_other_cache ||= BikeStyle.find_by_style('OTHER').id + end + + def bike_attr_bike_condition_id(_) + @bike_condition_undertermined_cache ||= BikeCondition.find_by_condition('UNDETERMINED').id + end + + def bike_attr_seat_tube_height(_) + 0 + end + + def bike_attr_bike_wheel_size_id(_) + @bike_condition_wheel_size_undertermined_cache ||= BikeWheelSize.find_by_description('UNDETERMINED').id + end + + def bike_attr_serial_number(_) + 'UNDETERMINED' + end + def clean_value(value) value_or_nil strip_value(value) end From b5e8aa554f7e178f70dc26857e8d032f43936df3 Mon Sep 17 00:00:00 2001 From: Ilya Konanykhin Date: Mon, 20 Mar 2017 00:48:37 +0600 Subject: [PATCH 10/17] BikeCsvImporter: refactor by splitting into parts --- app/models/bike_csv_importer.rb | 130 +-------------------- app/models/bike_csv_importer/bike_attrs.rb | 78 +++++++++++++ app/models/bike_csv_importer/cache.rb | 26 +++++ app/models/bike_csv_importer/cleaner.rb | 15 +++ 4 files changed, 124 insertions(+), 125 deletions(-) create mode 100644 app/models/bike_csv_importer/bike_attrs.rb create mode 100644 app/models/bike_csv_importer/cache.rb create mode 100644 app/models/bike_csv_importer/cleaner.rb diff --git a/app/models/bike_csv_importer.rb b/app/models/bike_csv_importer.rb index cac2963..f9694b3 100644 --- a/app/models/bike_csv_importer.rb +++ b/app/models/bike_csv_importer.rb @@ -1,6 +1,11 @@ require 'csv' class BikeCsvImporter + + include BikeCsvImporter::Cache + include BikeCsvImporter::Cleaner + include BikeCsvImporter::BikeAttrs + attr_reader :file, :dry_run def initialize(file, dry_run) @@ -11,10 +16,6 @@ class BikeCsvImporter def run result = {imported: {}, skipped: {}} - @bike_purpose_cache = {} - @bike_brand_cache = {} - @bike_model_cache = {} - fetch do |bike_hash| bike = import_bike bike_hash check_method = dry_run ? :valid? : :persisted? @@ -25,12 +26,6 @@ class BikeCsvImporter end end - missing_brands = @bike_brand_cache.select { |_, v| v.nil? }.map(&:first) - result[:missing_brands] = missing_brands if missing_brands.any? - - missing_models = @bike_model_cache.select { |_, v| v.nil? }.map(&:first) - result[:missing_models] = missing_models if missing_models.any? - result end @@ -70,9 +65,6 @@ class BikeCsvImporter @header.zip(row).to_h end - # + Velocipede Number -> Bikes.shop_id - # + Program -> Bikes.bike_purpose_id - # # Gone -> If "Yes", set 'gone' to true, then create a Log entry like the following: # id | loggable_id | loggable_type | logger_id | logger_type | context | start_date | end_date | description | log_action_id | log_action_type | created_at | updated_at # 18 | 1 | Bike | 4 | User | | 2017-02-03 23:27:00 | 2017-02-03 23:27:00 | Gone | 5 | ActsAsLoggable::BikeAction | 2017-02-03 23:27:36.8387 | 2017-02-03 23:27:36.8387 @@ -80,123 +72,11 @@ class BikeCsvImporter # # Date In -> Create a bike log entry with start_date & end_date with same value as "Date In". Set action_id to "AQUIRED" # Date Out -> Should be the start_date & end_date value for "Gone" column mentioned above. - # + Price -> Bikes.value - # + Make -> Bikes.bike_brand_id - # + Model -> Bikes.bike_model_id - # + to Whom -> ignore - # + Zip Code -> ignore # Comment -> Create a bike log entry with action_id "NOTE". The log 'description' should be the value of 'Comment'. - # - # Data at the end of the CSV seems to be missing a lot of fields. If any field is empty, the the value can be "UNDETERMINED" if applicable, or ignored. Any other dates beside "Date In/Out" can be current date. - # - # I realize the log entry stuff is likely complicated and time consuming. At a minimum, the most important columns to import are the following: Velocipede Number, Program, Gone, Make, Model. To avoid creating the log entry for "Gone", we would instead just set 'gone' to true. def import_bike(bike_hash) bike = Bike.new bike_attrs(bike_hash) #bike.save unless dry_run raise 'TODO save' unless dry_run bike end - - def bike_attrs(bike_hash) - %i{ shop_id bike_purpose_id value bike_brand_id bike_model_id model bike_style_id bike_condition_id seat_tube_height bike_wheel_size_id serial_number }.each_with_object({}) do |field, memo| - memo[field] = send :"bike_attr_#{ field }", bike_hash - end - end - - def bike_attr_shop_id(bike_hash) - bike_hash['velocipede number'].to_i - end - - def bike_attr_bike_purpose_id(bike_hash) - map = { - 'SALE' => /shop|as(-|\s+)is|safety\s*check/, - 'BUILDBIKE' => /build|bikes.*world/, - 'STORAGE' => nil, - 'PARTS' => /part|frame/, - 'SCRAP' => /scrap|strip/, - } - - default = 'UNDETERMINED' - test_value = clean_value(bike_hash['program']).try :downcase - value = map.find { |_, regexp| regexp.try :match, test_value }.try :first - - cached_bike_purpose(value || default).id - end - - def bike_attr_gone(bike_hash) - %w{ yes yeah y }.include? clean_value(bike_hash['gone']).try :downcase - end - - def bike_attr_value(bike_hash) - clean_value(bike_hash['price']).try(:gsub, /[$]/, '').try :to_i - end - - def bike_attr_bike_brand_id(bike_hash) - brand = clean_value(bike_hash['make']) - return unless brand - cached_bike_brand(brand).try :id - end - - def bike_attr_bike_model_id(bike_hash) - model = clean_value(bike_hash['model']) - return unless model - cached_bike_model(model).try :id - end - - def bike_attr_model(bike_hash) - model = clean_value bike_hash['model'] - model unless model =~ /unknown/i - end - - def bike_attr_bike_style_id(_) - @bike_style_other_cache ||= BikeStyle.find_by_style('OTHER').id - end - - def bike_attr_bike_condition_id(_) - @bike_condition_undertermined_cache ||= BikeCondition.find_by_condition('UNDETERMINED').id - end - - def bike_attr_seat_tube_height(_) - 0 - end - - def bike_attr_bike_wheel_size_id(_) - @bike_condition_wheel_size_undertermined_cache ||= BikeWheelSize.find_by_description('UNDETERMINED').id - end - - def bike_attr_serial_number(_) - 'UNDETERMINED' - end - - def clean_value(value) - value_or_nil strip_value(value) - end - - def strip_value(value) - value.try(:strip).try(:gsub, /\n|\r/, '') - end - - def value_or_nil(value) - return value unless ['?', 'n/a', 'missing', 'unknown', ''].include? value.try(:downcase) - end - - def cached_bike_purpose(purpose) - @bike_purpose_cache[purpose] ||= BikePurpose.find_by_purpose purpose - end - - def cached_bike_brand(brand) - if @bike_brand_cache.has_key? brand - @bike_brand_cache[brand] - else - @bike_brand_cache[brand] = BikeBrand.where('lower(brand) = ?', brand.downcase).first - end - end - - def cached_bike_model(model) - if @bike_model_cache.has_key? model - @bike_model_cache[model] - else - @bike_model_cache[model] = BikeModel.where('lower(model) = ?', model.downcase).first - end - end end diff --git a/app/models/bike_csv_importer/bike_attrs.rb b/app/models/bike_csv_importer/bike_attrs.rb new file mode 100644 index 0000000..dff8897 --- /dev/null +++ b/app/models/bike_csv_importer/bike_attrs.rb @@ -0,0 +1,78 @@ +class BikeCsvImporter + module BikeAttrs + def bike_attr_fields + %i{ shop_id bike_purpose_id value bike_brand_id bike_model_id model bike_style_id bike_condition_id seat_tube_height bike_wheel_size_id serial_number } + end + + def bike_attrs(bike_hash) + bike_attr_fields.each_with_object({}) do |field, memo| + memo[field] = send :"bike_attr_#{ field }", bike_hash + end + end + + def bike_attr_shop_id(bike_hash) + bike_hash['velocipede number'].to_i + end + + def bike_attr_bike_purpose_id(bike_hash) + map = { + 'SALE' => /shop|as(-|\s+)is|safety\s*check/, + 'BUILDBIKE' => /build|bikes.*world/, + 'STORAGE' => nil, + 'PARTS' => /part|frame/, + 'SCRAP' => /scrap|strip/, + } + + default = 'UNDETERMINED' + test_value = clean_value(bike_hash['program']).try :downcase + value = map.find { |_, regexp| regexp.try :match, test_value }.try :first + + cached_bike_purpose(value || default).id + end + + def bike_attr_gone(bike_hash) + %w{ yes yeah y }.include? clean_value(bike_hash['gone']).try :downcase + end + + def bike_attr_value(bike_hash) + clean_value(bike_hash['price']).try(:gsub, /[$]/, '').try :to_i + end + + def bike_attr_bike_brand_id(bike_hash) + brand = clean_value(bike_hash['make']) + return unless brand + cached_bike_brand(brand).try :id + end + + def bike_attr_bike_model_id(bike_hash) + model = clean_value(bike_hash['model']) + return unless model + cached_bike_model(model).try :id + end + + def bike_attr_model(bike_hash) + model = clean_value bike_hash['model'] + model unless model =~ /unknown/i + end + + def bike_attr_bike_style_id(_) + @bike_style_other_cache ||= BikeStyle.find_by_style('OTHER').id + end + + def bike_attr_bike_condition_id(_) + @bike_condition_undertermined_cache ||= BikeCondition.find_by_condition('UNDETERMINED').id + end + + def bike_attr_seat_tube_height(_) + 0 + end + + def bike_attr_bike_wheel_size_id(_) + @bike_condition_wheel_size_undertermined_cache ||= BikeWheelSize.find_by_description('UNDETERMINED').id + end + + def bike_attr_serial_number(_) + 'UNDETERMINED' + end + end +end diff --git a/app/models/bike_csv_importer/cache.rb b/app/models/bike_csv_importer/cache.rb new file mode 100644 index 0000000..960f37a --- /dev/null +++ b/app/models/bike_csv_importer/cache.rb @@ -0,0 +1,26 @@ +class BikeCsvImporter + module Cache + def cached_bike_purpose(purpose) + @bike_purpose_cache ||= {} + @bike_purpose_cache[purpose] ||= BikePurpose.find_by_purpose purpose + end + + def cached_bike_brand(brand) + @bike_brand_cache ||= {} + if @bike_brand_cache.has_key? brand + @bike_brand_cache[brand] + else + @bike_brand_cache[brand] = BikeBrand.where('lower(brand) = ?', brand.downcase).first + end + end + + def cached_bike_model(model) + @bike_model_cache ||= {} + if @bike_model_cache.has_key? model + @bike_model_cache[model] + else + @bike_model_cache[model] = BikeModel.where('lower(model) = ?', model.downcase).first + end + end + end +end diff --git a/app/models/bike_csv_importer/cleaner.rb b/app/models/bike_csv_importer/cleaner.rb new file mode 100644 index 0000000..1b745eb --- /dev/null +++ b/app/models/bike_csv_importer/cleaner.rb @@ -0,0 +1,15 @@ +class BikeCsvImporter + module Cleaner + def clean_value(value) + value_or_nil strip_value(value) + end + + def strip_value(value) + value.try(:strip).try(:gsub, /\n|\r/, '') + end + + def value_or_nil(value) + return value unless ['?', 'n/a', 'missing', 'unknown', ''].include? value.try(:downcase) + end + end +end From 0a338a248590722b0167597db8e92fca092a72eb Mon Sep 17 00:00:00 2001 From: Ilya Konanykhin Date: Mon, 20 Mar 2017 00:55:26 +0600 Subject: [PATCH 11/17] BikeCsvImporter: refactor BikeAttrs for methods to receive a single value, not the whole hash --- app/models/bike_csv_importer/bike_attrs.rb | 54 +++++++++++++--------- 1 file changed, 32 insertions(+), 22 deletions(-) diff --git a/app/models/bike_csv_importer/bike_attrs.rb b/app/models/bike_csv_importer/bike_attrs.rb index dff8897..facd373 100644 --- a/app/models/bike_csv_importer/bike_attrs.rb +++ b/app/models/bike_csv_importer/bike_attrs.rb @@ -1,20 +1,33 @@ class BikeCsvImporter module BikeAttrs def bike_attr_fields - %i{ shop_id bike_purpose_id value bike_brand_id bike_model_id model bike_style_id bike_condition_id seat_tube_height bike_wheel_size_id serial_number } + { + shop_id: 'velocipede number', + bike_purpose_id: 'program', + #gone: 'gone', + value: 'price', + bike_brand_id: 'make', + bike_model_id: 'model', + model: 'model', + bike_style_id: nil, + bike_condition_id: nil, + seat_tube_height: nil, + bike_wheel_size_id: nil, + serial_number: nil, + } end def bike_attrs(bike_hash) - bike_attr_fields.each_with_object({}) do |field, memo| - memo[field] = send :"bike_attr_#{ field }", bike_hash + bike_attr_fields.each_with_object({}) do |(model_field, csv_field), memo| + memo[model_field] = send :"bike_attr_#{ model_field }", clean_value(bike_hash[csv_field]) end end - def bike_attr_shop_id(bike_hash) - bike_hash['velocipede number'].to_i + def bike_attr_shop_id(value) + value.to_i end - def bike_attr_bike_purpose_id(bike_hash) + def bike_attr_bike_purpose_id(value) map = { 'SALE' => /shop|as(-|\s+)is|safety\s*check/, 'BUILDBIKE' => /build|bikes.*world/, @@ -24,35 +37,32 @@ class BikeCsvImporter } default = 'UNDETERMINED' - test_value = clean_value(bike_hash['program']).try :downcase + test_value = value.try :downcase value = map.find { |_, regexp| regexp.try :match, test_value }.try :first cached_bike_purpose(value || default).id end - def bike_attr_gone(bike_hash) - %w{ yes yeah y }.include? clean_value(bike_hash['gone']).try :downcase + def bike_attr_gone(value) + %w{ yes yeah y }.include? value.try :downcase end - def bike_attr_value(bike_hash) - clean_value(bike_hash['price']).try(:gsub, /[$]/, '').try :to_i + def bike_attr_value(value) + value.try(:gsub, /[$]/, '').try :to_i end - def bike_attr_bike_brand_id(bike_hash) - brand = clean_value(bike_hash['make']) - return unless brand - cached_bike_brand(brand).try :id + def bike_attr_bike_brand_id(value) + return unless value + cached_bike_brand(value).try :id end - def bike_attr_bike_model_id(bike_hash) - model = clean_value(bike_hash['model']) - return unless model - cached_bike_model(model).try :id + def bike_attr_bike_model_id(value) + return unless value + cached_bike_model(value).try :id end - def bike_attr_model(bike_hash) - model = clean_value bike_hash['model'] - model unless model =~ /unknown/i + def bike_attr_model(value) + value if value && value !~ /unknown/i end def bike_attr_bike_style_id(_) From c1ca5012043fb1f8148712463dbaf593eb2ee9ed Mon Sep 17 00:00:00 2001 From: Ilya Konanykhin Date: Mon, 20 Mar 2017 21:25:24 +0600 Subject: [PATCH 12/17] BikeCsvImporter: fix incorrect method signatures --- app/models/bike_csv_importer.rb | 37 +++++++++++++++------------------ lib/tasks/import.rake | 4 ++-- 2 files changed, 19 insertions(+), 22 deletions(-) diff --git a/app/models/bike_csv_importer.rb b/app/models/bike_csv_importer.rb index f9694b3..cf80a0f 100644 --- a/app/models/bike_csv_importer.rb +++ b/app/models/bike_csv_importer.rb @@ -1,24 +1,32 @@ require 'csv' +# Gone -> If "Yes", set 'gone' to true, then create a Log entry like the following: +# id | loggable_id | loggable_type | logger_id | logger_type | context | start_date | end_date | description | log_action_id | log_action_type | created_at | updated_at +# 18 | 1 | Bike | 4 | User | | 2017-02-03 23:27:00 | 2017-02-03 23:27:00 | Gone | 5 | ActsAsLoggable::BikeAction | 2017-02-03 23:27:36.8387 | 2017-02-03 23:27:36.8387 +# See https://github.com/spacemunkay/BikeShed/blob/master/app/components/bike_logs.rb#L12-L18 for example. Use user_id 1 for current_user_id (1 should be the admin ID I think). Use "Date Out" column for start_date & end_date. Set action_id to "COMPLETED". +# +# Date In -> Create a bike log entry with start_date & end_date with same value as "Date In". Set action_id to "AQUIRED" +# Date Out -> Should be the start_date & end_date value for "Gone" column mentioned above. +# Comment -> Create a bike log entry with action_id "NOTE". The log 'description' should be the value of 'Comment'. + class BikeCsvImporter include BikeCsvImporter::Cache include BikeCsvImporter::Cleaner include BikeCsvImporter::BikeAttrs - attr_reader :file, :dry_run + attr_reader :file - def initialize(file, dry_run) - @file = file - @dry_run = !!dry_run + def initialize(file) + @file = file end - def run + def run(dry_run) result = {imported: {}, skipped: {}} fetch do |bike_hash| - bike = import_bike bike_hash - check_method = dry_run ? :valid? : :persisted? + bike = new_bike bike_hash + check_method = dry_run ? :valid? : :save if bike.try check_method result[:imported][bike.shop_id] = bike.inspect else @@ -65,18 +73,7 @@ class BikeCsvImporter @header.zip(row).to_h end - # Gone -> If "Yes", set 'gone' to true, then create a Log entry like the following: - # id | loggable_id | loggable_type | logger_id | logger_type | context | start_date | end_date | description | log_action_id | log_action_type | created_at | updated_at - # 18 | 1 | Bike | 4 | User | | 2017-02-03 23:27:00 | 2017-02-03 23:27:00 | Gone | 5 | ActsAsLoggable::BikeAction | 2017-02-03 23:27:36.8387 | 2017-02-03 23:27:36.8387 - # See https://github.com/spacemunkay/BikeShed/blob/master/app/components/bike_logs.rb#L12-L18 for example. Use user_id 1 for current_user_id (1 should be the admin ID I think). Use "Date Out" column for start_date & end_date. Set action_id to "COMPLETED". - # - # Date In -> Create a bike log entry with start_date & end_date with same value as "Date In". Set action_id to "AQUIRED" - # Date Out -> Should be the start_date & end_date value for "Gone" column mentioned above. - # Comment -> Create a bike log entry with action_id "NOTE". The log 'description' should be the value of 'Comment'. - def import_bike(bike_hash) - bike = Bike.new bike_attrs(bike_hash) - #bike.save unless dry_run - raise 'TODO save' unless dry_run - bike + def new_bike(bike_hash) + Bike.new bike_attrs(bike_hash) end end diff --git a/lib/tasks/import.rake b/lib/tasks/import.rake index f459413..7a28b57 100644 --- a/lib/tasks/import.rake +++ b/lib/tasks/import.rake @@ -5,7 +5,7 @@ namespace :import do file, dry_run = args.values_at :file, :dry_run next puts "Usage: rake #{t.name}[$csv_file_path[,$dry_run=dry]]" unless file next puts "File #{file} does not exist or is unreachable" unless File.readable? file - pp BikeCsvImporter.new(file, dry_run == 'dry').run + BikeCsvImporter.new(file).run dry_run == 'dry' end # Analyze a single field from CSV file @@ -13,7 +13,7 @@ namespace :import do file, field = args.values_at :file, :field next puts "Usage: rake #{t.name}[$csv_file_path[,\"$field_name\"]]" unless file next puts "File #{file} does not exist or is unreachable" unless File.readable? file - pp BikeCsvImporter.new(file).analyze field ? [field] : [] + BikeCsvImporter.new(file).analyze field ? [field] : [] end end end From 072f303c65a3e6820edb6a4afb0c3d914d6afbc4 Mon Sep 17 00:00:00 2001 From: Ilya Konanykhin Date: Mon, 20 Mar 2017 21:41:07 +0600 Subject: [PATCH 13/17] BikeCsvImporter: refactor importer to output log messages to stdoud --- app/models/bike_csv_importer.rb | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/app/models/bike_csv_importer.rb b/app/models/bike_csv_importer.rb index cf80a0f..2ffe75b 100644 --- a/app/models/bike_csv_importer.rb +++ b/app/models/bike_csv_importer.rb @@ -22,22 +22,28 @@ class BikeCsvImporter end def run(dry_run) - result = {imported: {}, skipped: {}} + imported_count, skipped_count = 0, 0 + + puts "Performing a #{dry_run ? 'DRY RUN' : 'LIVE RUN'} of import" fetch do |bike_hash| bike = new_bike bike_hash check_method = dry_run ? :valid? : :save if bike.try check_method - result[:imported][bike.shop_id] = bike.inspect + puts "Imported #{bike.shop_id}: #{bike}" + imported_count += 1 else - result[:skipped][bike.try(:shop_id) || bike_hash.values.first] = bike.try(:errors).try(:messages) + puts "Skipped #{bike.try(:shop_id) || bike_hash.values.first}: #{bike.try(:errors).try(:full_messages).try :join, '; '}" + skipped_count += 1 end end - result + puts "#{imported_count} bikes imported, #{skipped_count} bikes skipped, total of #{imported_count + skipped_count} rows in the CSV" end def analyze(fields = []) + puts "Analyzing CSV values frequency for #{fields.any? ? fields.join(', ') + ' field' : 'all fields'}" + fields = fields.map &:downcase grouped = {} fetch do |bike_hash| @@ -48,7 +54,14 @@ class BikeCsvImporter grouped[key][value] += 1 end end - grouped + + grouped.each do |field, values| + puts "#{field}:" + values.each do |value, count| + puts "\t#{value.inspect}: #{count}" + end + puts "\tTotal of #{values.count} distinct values" + end end From ae79a346527f9071d8f5be0da8ddd98838fd9441 Mon Sep 17 00:00:00 2001 From: Ilya Konanykhin Date: Mon, 20 Mar 2017 21:44:27 +0600 Subject: [PATCH 14/17] BikeCsvImporter: colorize importer output to highlight individual row statuses --- Gemfile | 1 + Gemfile.lock | 2 ++ app/models/bike_csv_importer.rb | 4 ++-- 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/Gemfile b/Gemfile index 85b6137..ff271e5 100644 --- a/Gemfile +++ b/Gemfile @@ -35,6 +35,7 @@ group :development, :test do gem 'factory_girl_rails', '~> 1.2' gem 'pry', '~> 0.9.8' gem 'faker', '~> 1.2.0' + gem 'colorize' end group :test do diff --git a/Gemfile.lock b/Gemfile.lock index 3afc08d..8f14861 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -69,6 +69,7 @@ GEM coffee-script-source execjs coffee-script-source (1.10.0) + colorize (0.8.1) database_cleaner (1.2.0) decent_exposure (1.0.2) devise (2.0.6) @@ -255,6 +256,7 @@ DEPENDENCIES cancan capybara (~> 2.2.1) coffee-rails (~> 3.2.1) + colorize database_cleaner (~> 1.2.0) decent_exposure (~> 1.0.1) devise (~> 2.0.4) diff --git a/app/models/bike_csv_importer.rb b/app/models/bike_csv_importer.rb index 2ffe75b..ed04149 100644 --- a/app/models/bike_csv_importer.rb +++ b/app/models/bike_csv_importer.rb @@ -30,10 +30,10 @@ class BikeCsvImporter bike = new_bike bike_hash check_method = dry_run ? :valid? : :save if bike.try check_method - puts "Imported #{bike.shop_id}: #{bike}" + puts "Imported #{bike.shop_id}: #{bike}".green imported_count += 1 else - puts "Skipped #{bike.try(:shop_id) || bike_hash.values.first}: #{bike.try(:errors).try(:full_messages).try :join, '; '}" + puts "Skipped #{bike.try(:shop_id) || bike_hash.values.first}: #{bike.try(:errors).try(:full_messages).try :join, '; '}".red skipped_count += 1 end end From a6a616cf6d9d9118b3ba9cdc8c9dc160cd8488b9 Mon Sep 17 00:00:00 2001 From: Ilya Konanykhin Date: Mon, 20 Mar 2017 22:38:14 +0600 Subject: [PATCH 15/17] BikeCsvImporter: add status logs --- app/models/bike_csv_importer.rb | 24 ++++++++++------ app/models/bike_csv_importer/cache.rb | 5 ++++ app/models/bike_csv_importer/logs.rb | 41 +++++++++++++++++++++++++++ 3 files changed, 61 insertions(+), 9 deletions(-) create mode 100644 app/models/bike_csv_importer/logs.rb diff --git a/app/models/bike_csv_importer.rb b/app/models/bike_csv_importer.rb index ed04149..07fce50 100644 --- a/app/models/bike_csv_importer.rb +++ b/app/models/bike_csv_importer.rb @@ -1,19 +1,11 @@ require 'csv' -# Gone -> If "Yes", set 'gone' to true, then create a Log entry like the following: -# id | loggable_id | loggable_type | logger_id | logger_type | context | start_date | end_date | description | log_action_id | log_action_type | created_at | updated_at -# 18 | 1 | Bike | 4 | User | | 2017-02-03 23:27:00 | 2017-02-03 23:27:00 | Gone | 5 | ActsAsLoggable::BikeAction | 2017-02-03 23:27:36.8387 | 2017-02-03 23:27:36.8387 -# See https://github.com/spacemunkay/BikeShed/blob/master/app/components/bike_logs.rb#L12-L18 for example. Use user_id 1 for current_user_id (1 should be the admin ID I think). Use "Date Out" column for start_date & end_date. Set action_id to "COMPLETED". -# -# Date In -> Create a bike log entry with start_date & end_date with same value as "Date In". Set action_id to "AQUIRED" -# Date Out -> Should be the start_date & end_date value for "Gone" column mentioned above. -# Comment -> Create a bike log entry with action_id "NOTE". The log 'description' should be the value of 'Comment'. - class BikeCsvImporter include BikeCsvImporter::Cache include BikeCsvImporter::Cleaner include BikeCsvImporter::BikeAttrs + include BikeCsvImporter::Logs attr_reader :file @@ -31,6 +23,16 @@ class BikeCsvImporter check_method = dry_run ? :valid? : :save if bike.try check_method puts "Imported #{bike.shop_id}: #{bike}".green + + logs = new_logs_entries bike, bike_hash + logs.each do |log| + if log.send check_method + puts "\tLog entry created: #{log.inspect}".green + else + puts "\tLog entry creation failed: #{log.errors.full_messages.join '; '}".red + end + end + imported_count += 1 else puts "Skipped #{bike.try(:shop_id) || bike_hash.values.first}: #{bike.try(:errors).try(:full_messages).try :join, '; '}".red @@ -89,4 +91,8 @@ class BikeCsvImporter def new_bike(bike_hash) Bike.new bike_attrs(bike_hash) end + + def new_logs_entries(bike, bike_hash) + %i{ acquired comment gone }.map { |x| send :"log_entry_#{x}", bike, bike_hash }.compact + end end diff --git a/app/models/bike_csv_importer/cache.rb b/app/models/bike_csv_importer/cache.rb index 960f37a..a90c947 100644 --- a/app/models/bike_csv_importer/cache.rb +++ b/app/models/bike_csv_importer/cache.rb @@ -22,5 +22,10 @@ class BikeCsvImporter @bike_model_cache[model] = BikeModel.where('lower(model) = ?', model.downcase).first end end + + def cached_log_bike_action(action) + @log_bike_action_id_cache ||= {} + @log_bike_action_id_cache[action] ||= ActsAsLoggable::BikeAction.find_by_action(action) + end end end diff --git a/app/models/bike_csv_importer/logs.rb b/app/models/bike_csv_importer/logs.rb new file mode 100644 index 0000000..a965c61 --- /dev/null +++ b/app/models/bike_csv_importer/logs.rb @@ -0,0 +1,41 @@ +class BikeCsvImporter + module Logs + def log_entry_gone(bike, bike_hash) + if clean_value(bike_hash['gone']).to_s =~ /y/i + log_entry bike, log_entry_date(clean_value(bike_hash['date out'])), 'COMPLETED', 'Gone' + end + end + + def log_entry_acquired(bike, bike_hash) + if clean_value(bike_hash['date in']) + log_entry bike, log_entry_date(clean_value(bike_hash['date in'])), 'ACQUIRED' + end + end + + def log_entry_comment(bike, bike_hash) + if clean_value(bike_hash['comment']).present? + log_entry bike, nil, 'NOTE', clean_value(bike_hash['comment']) + end + end + + def log_entry_date(value) + return unless value + Date.strptime value, '%m/%d/%y' rescue nil + end + + def log_entry(bike, date, type, description = nil) + date ||= DateTime.now + bike_action = cached_log_bike_action(type) + + ActsAsLoggable::Log.new( + loggable_type: bike.class.to_s, + loggable_id: bike.id || bike.shop_id.to_i, # for dry run + log_action_type: bike_action.class.to_s, + log_action_id: bike_action.id, + start_date: date, + end_date: date, + description: description, + ) + end + end +end From b7729ae29aa563c0259451b46c521045684f7b6c Mon Sep 17 00:00:00 2001 From: Ilya Konanykhin Date: Mon, 20 Mar 2017 22:49:45 +0600 Subject: [PATCH 16/17] BikeCsvImporter: add comments --- app/models/bike_csv_importer.rb | 33 ++++++++++++++++++++++ app/models/bike_csv_importer/bike_attrs.rb | 1 + app/models/bike_csv_importer/cache.rb | 1 + app/models/bike_csv_importer/cleaner.rb | 1 + app/models/bike_csv_importer/logs.rb | 1 + lib/tasks/import.rake | 6 ++++ 6 files changed, 43 insertions(+) diff --git a/app/models/bike_csv_importer.rb b/app/models/bike_csv_importer.rb index 07fce50..da51a49 100644 --- a/app/models/bike_csv_importer.rb +++ b/app/models/bike_csv_importer.rb @@ -1,5 +1,6 @@ require 'csv' +# Imports data from CSV file into the bikes database. class BikeCsvImporter include BikeCsvImporter::Cache @@ -9,10 +10,16 @@ class BikeCsvImporter attr_reader :file + # Default constructor + # + # @param [String] file Path to the CSV file def initialize(file) @file = file end + # Runs the import. Will print out progress to stdout + # + # @param [Boolean] dry_run If true, does not save data, only shows the progress of validation def run(dry_run) imported_count, skipped_count = 0, 0 @@ -43,6 +50,9 @@ class BikeCsvImporter puts "#{imported_count} bikes imported, #{skipped_count} bikes skipped, total of #{imported_count + skipped_count} rows in the CSV" end + # Analyzes and prints out the input CSV file values + # + # @param [Array] fields If passed, analyze only the given fields (names are down cased) def analyze(fields = []) puts "Analyzing CSV values frequency for #{fields.any? ? fields.join(', ') + ' field' : 'all fields'}" @@ -70,6 +80,10 @@ class BikeCsvImporter private + # Parses the CSV header & rows, yielding a block for each row (except the header) + # Header is down cased! + # + # @param [Proc] &block The block to yield to def fetch CSV.foreach(file).each_with_index do |row, i| if i.zero? @@ -80,18 +94,37 @@ class BikeCsvImporter end end + # Parses & stores the input header, down casing by the way + # + # @param [Array] row def parse_header(row) @header = row.map(&:downcase) end + # Parses the input row into a hash with keys from the header, @see #parse_header + # + # @param [Array] row + # + # @return [Hash] def parse_bike(row) @header.zip(row).to_h end + # Constructs a new Bike instance from the given hash from a CSV row + # + # @param [Hash] bike_hash + # + # @return [Bike] def new_bike(bike_hash) Bike.new bike_attrs(bike_hash) end + # Constructs new Bike Log Entries instances from the given hash from a CSV row + # + # @param [Bike] bike The Bike instance to construct log entries for + # @param [Hash] bike_hash The input hash from a CSV row + # + # @return [Array] def new_logs_entries(bike, bike_hash) %i{ acquired comment gone }.map { |x| send :"log_entry_#{x}", bike, bike_hash }.compact end diff --git a/app/models/bike_csv_importer/bike_attrs.rb b/app/models/bike_csv_importer/bike_attrs.rb index facd373..375d90c 100644 --- a/app/models/bike_csv_importer/bike_attrs.rb +++ b/app/models/bike_csv_importer/bike_attrs.rb @@ -1,3 +1,4 @@ +# Helper module to create various Bike instanct fields from a CSV row hash class BikeCsvImporter module BikeAttrs def bike_attr_fields diff --git a/app/models/bike_csv_importer/cache.rb b/app/models/bike_csv_importer/cache.rb index a90c947..33b36a6 100644 --- a/app/models/bike_csv_importer/cache.rb +++ b/app/models/bike_csv_importer/cache.rb @@ -1,3 +1,4 @@ +# Helper module to create various cached instances for bike CSV imports class BikeCsvImporter module Cache def cached_bike_purpose(purpose) diff --git a/app/models/bike_csv_importer/cleaner.rb b/app/models/bike_csv_importer/cleaner.rb index 1b745eb..65794e7 100644 --- a/app/models/bike_csv_importer/cleaner.rb +++ b/app/models/bike_csv_importer/cleaner.rb @@ -1,3 +1,4 @@ +# Helper module to clean the incoming data from CSV fields class BikeCsvImporter module Cleaner def clean_value(value) diff --git a/app/models/bike_csv_importer/logs.rb b/app/models/bike_csv_importer/logs.rb index a965c61..cbc7d92 100644 --- a/app/models/bike_csv_importer/logs.rb +++ b/app/models/bike_csv_importer/logs.rb @@ -1,3 +1,4 @@ +# Helper module to create ActsAsLoggable log entries for a Bike instance from a CSV row hash class BikeCsvImporter module Logs def log_entry_gone(bike, bike_hash) diff --git a/lib/tasks/import.rake b/lib/tasks/import.rake index 7a28b57..d9a3b55 100644 --- a/lib/tasks/import.rake +++ b/lib/tasks/import.rake @@ -1,6 +1,9 @@ namespace :import do namespace :bikes do # Imports bikes info from CSV file + # + # rake import:bikes:csv[import.csv,dry] # dry run + # rake import:bikes:csv[import.csv] # live import task :csv, [:file, :dry_run] => :environment do |t, args| file, dry_run = args.values_at :file, :dry_run next puts "Usage: rake #{t.name}[$csv_file_path[,$dry_run=dry]]" unless file @@ -9,6 +12,9 @@ namespace :import do end # Analyze a single field from CSV file + # + # rake import:bikes:analyze_csv[import.csv] # dumps all fields data + # rake import:bikes:analyze_csv[import.csv,"date in"] # shows only single field task :analyze_csv, [:file, :field] => :environment do |t, args| file, field = args.values_at :file, :field next puts "Usage: rake #{t.name}[$csv_file_path[,\"$field_name\"]]" unless file From 0f1dab1ad1fd15611d8c9f0ebf6362b336bc5672 Mon Sep 17 00:00:00 2001 From: Ilya Konanykhin Date: Mon, 27 Mar 2017 10:59:07 +0600 Subject: [PATCH 17/17] BikeCsvImporter: add import of brands as a separate step --- app/models/bike_csv_importer.rb | 31 +++++++++++++++++++++- app/models/bike_csv_importer/bike_attrs.rb | 10 ++++--- app/models/bike_csv_importer/cache.rb | 7 +++-- lib/tasks/import.rake | 11 ++++++++ 4 files changed, 53 insertions(+), 6 deletions(-) diff --git a/app/models/bike_csv_importer.rb b/app/models/bike_csv_importer.rb index da51a49..5b7c157 100644 --- a/app/models/bike_csv_importer.rb +++ b/app/models/bike_csv_importer.rb @@ -26,8 +26,9 @@ class BikeCsvImporter puts "Performing a #{dry_run ? 'DRY RUN' : 'LIVE RUN'} of import" fetch do |bike_hash| - bike = new_bike bike_hash + bike = new_bike bike_hash check_method = dry_run ? :valid? : :save + if bike.try check_method puts "Imported #{bike.shop_id}: #{bike}".green @@ -76,6 +77,34 @@ class BikeCsvImporter end end + # Imports new brands from CSV file (field 'make'). Will print out progress to stdout + # + # @param [Boolean] dry_run If true, does not save data, only shows the progress of validation + def brands(dry_run) + created_count, skipped_count = 0, 0 + + puts "Performing a #{dry_run ? 'DRY RUN' : 'LIVE RUN'} of brands import" + + fetch do |bike_hash| + make = clean_value bike_hash['make'] + brand = bike_attr_bike_brand make, true + check_method = dry_run ? :valid? : :save + + if brand.try :persisted? + puts "Skipped already existing brand #{brand.brand}" + skipped_count +=1 + elsif brand.try check_method + puts "Created brand #{brand.brand}".green + created_count += 1 + else + puts "Skipped #{brand.try(:brand) || make}: #{brand.try(:errors).try(:full_messages).try(:join, '; ') || 'object not created'}".red + skipped_count += 1 + end + end + + puts "#{created_count} brand created, #{skipped_count} brand skipped, total of #{created_count + skipped_count} rows in the CSV" + end + private diff --git a/app/models/bike_csv_importer/bike_attrs.rb b/app/models/bike_csv_importer/bike_attrs.rb index 375d90c..a478406 100644 --- a/app/models/bike_csv_importer/bike_attrs.rb +++ b/app/models/bike_csv_importer/bike_attrs.rb @@ -20,7 +20,7 @@ class BikeCsvImporter def bike_attrs(bike_hash) bike_attr_fields.each_with_object({}) do |(model_field, csv_field), memo| - memo[model_field] = send :"bike_attr_#{ model_field }", clean_value(bike_hash[csv_field]) + memo[model_field] = send :"bike_attr_#{model_field}", clean_value(bike_hash[csv_field]) end end @@ -52,9 +52,13 @@ class BikeCsvImporter value.try(:gsub, /[$]/, '').try :to_i end + def bike_attr_bike_brand(value, new_if_empty = false) + value = 'Unknown' if !value || value =~ /\Aunknown/i + cached_bike_brand value, new_if_empty + end + def bike_attr_bike_brand_id(value) - return unless value - cached_bike_brand(value).try :id + bike_attr_bike_brand(value, false).try :id end def bike_attr_bike_model_id(value) diff --git a/app/models/bike_csv_importer/cache.rb b/app/models/bike_csv_importer/cache.rb index 33b36a6..6a0e3a7 100644 --- a/app/models/bike_csv_importer/cache.rb +++ b/app/models/bike_csv_importer/cache.rb @@ -6,12 +6,15 @@ class BikeCsvImporter @bike_purpose_cache[purpose] ||= BikePurpose.find_by_purpose purpose end - def cached_bike_brand(brand) + def cached_bike_brand(brand, new_if_empty = false) @bike_brand_cache ||= {} if @bike_brand_cache.has_key? brand @bike_brand_cache[brand] else - @bike_brand_cache[brand] = BikeBrand.where('lower(brand) = ?', brand.downcase).first + bike_brand = BikeBrand.where('lower(brand) = ?', brand.downcase).first + bike_brand ||= BikeBrand.new(brand: brand) if new_if_empty + + @bike_brand_cache[brand] = bike_brand end end diff --git a/lib/tasks/import.rake b/lib/tasks/import.rake index d9a3b55..43367f3 100644 --- a/lib/tasks/import.rake +++ b/lib/tasks/import.rake @@ -21,5 +21,16 @@ namespace :import do next puts "File #{file} does not exist or is unreachable" unless File.readable? file BikeCsvImporter.new(file).analyze field ? [field] : [] end + + # Imports new brands from CSV file + # + # rake import:bikes:brands_csv[import.csv,dry] # dry run + # rake import:bikes:brands_csv[import.csv] # live import + task :brands_csv, [:file, :dry_run] => :environment do |t, args| + file, dry_run = args.values_at :file, :dry_run + next puts "Usage: rake #{t.name}[$csv_file_path[,$dry_run=dry]]" unless file + next puts "File #{file} does not exist or is unreachable" unless File.readable? file + BikeCsvImporter.new(file).brands dry_run == 'dry' + end end end