413 lines
14 KiB
Perl
Executable File
413 lines
14 KiB
Perl
Executable File
#!/usr/bin/env perl
|
|
use warnings;
|
|
use strict;
|
|
|
|
# PORTAL CHANGES - assumes you have roster permission for the State League group
|
|
# Since the portal came online, CSV columns are accessed by name rather than position
|
|
# to handle column reordering. The script parses the header row to create a mapping.
|
|
#
|
|
# Required columns (must exist in CSV):
|
|
# First Name, Last Name, Email, Phone, Mailing Street, Mailing City,
|
|
# Mailing State, Mailing Postal Code, Status
|
|
#
|
|
# Optional columns:
|
|
# League ID (if missing, assumes Members At Large)
|
|
# Joined or Original Join Date (for join date)
|
|
|
|
# Google Civic Information API
|
|
# Quota: Queries per minute
|
|
# Current limit: 151
|
|
#
|
|
# Use sleep to limit queries to 30 per minute
|
|
# Currently there are 187 members in the whole state
|
|
|
|
# commandline options: type roster-file email
|
|
# type:
|
|
# google (all members with senate/delegate district query)
|
|
# ALL (all members without senate/delegate district query)
|
|
# League ID: WV000 (members at large) WV102 (Huntington) WV103 (Morgantown-Monogalia) WV107 (Wood County) WV112 (Jefferson)
|
|
# email (optional third arg):
|
|
# email (emails for specified type ALL (or google) or League ID)
|
|
|
|
# cpan App::cpanminus
|
|
# Now install any module you can find.
|
|
# cpanm Module::Name
|
|
use lib qw(/home/jr/perl5/lib/perl5); # adjust perl lib as required
|
|
use WWW::Curl::Simple;
|
|
use File::Glob ':glob';
|
|
use JSON;
|
|
use Data::Dumper;
|
|
use vars qw($key $STATE $stateLeagueID $localLeagueIDs);
|
|
require "./env";
|
|
|
|
# Configuration - key for LWVWV
|
|
# put it in a file call env with one line $key = "thekeyfromgoogle"
|
|
my $googleApiKey = $key;
|
|
|
|
# Allow normal queries, too
|
|
my $searchType = $ARGV[0];
|
|
|
|
my @files;
|
|
if ( !$ARGV[0] ) {
|
|
help();
|
|
exit;
|
|
}
|
|
else {
|
|
if ( $ARGV[0] !~ /google|ALL|$stateLeagueID|$localLeagueIDs/ ) {
|
|
print "WRONG FIRST ARGUMENT\n";
|
|
help();
|
|
print "WRONG FIRST ARGUMENT\n";
|
|
exit;
|
|
}
|
|
|
|
@files = bsd_glob( $ARGV[1] ); # Expand wildcard patterns (e.g., *.csv)
|
|
}
|
|
|
|
my $emailArg;
|
|
my $emailCsvArg;
|
|
if ( $ARGV[2] ) {
|
|
if ( $ARGV[2] eq 'email-csv' ) {
|
|
$emailCsvArg = 1;
|
|
}
|
|
else {
|
|
$emailArg = $ARGV[2];
|
|
}
|
|
}
|
|
|
|
# Open roster file
|
|
foreach my $file (@files) {
|
|
|
|
my $fh;
|
|
$| = 1;
|
|
open $fh, "<", $file or die "Cannot open $file: $!";
|
|
|
|
my $curl = WWW::Curl::Simple->new();
|
|
|
|
# Read and parse the header line to create column mapping
|
|
my $header = <$fh>;
|
|
chomp $header;
|
|
print "Header: $header\n" unless $emailCsvArg;
|
|
|
|
# Parse header to get column indices by name
|
|
my @headers = parse_csv_line($header);
|
|
my %col; # Column name -> index mapping
|
|
for (my $i = 0; $i < @headers; $i++) {
|
|
my $h = $headers[$i];
|
|
$h =~ s/^"//;
|
|
$h =~ s/"$//;
|
|
$h =~ s/^\s+|\s+$//g; # Trim whitespace
|
|
$col{$h} = $i;
|
|
}
|
|
|
|
# Verify required columns exist
|
|
my @required_cols = ('First Name', 'Last Name', 'Email', 'Phone', 'Mailing Street',
|
|
'Mailing City', 'Mailing State', 'Mailing Postal Code', 'Status');
|
|
my @missing;
|
|
foreach my $col_name (@required_cols) {
|
|
unless (exists $col{$col_name}) {
|
|
push @missing, $col_name;
|
|
}
|
|
}
|
|
if (@missing) {
|
|
warn "ERROR: Required columns not found in $file:\n";
|
|
warn " Missing: " . join(", ", @missing) . "\n";
|
|
warn " Available columns: " . join(", ", @headers) . "\n";
|
|
close $fh;
|
|
next; # Skip this file
|
|
}
|
|
|
|
# Determine file type based on League ID column presence
|
|
my $localLeague;
|
|
if (exists $col{'League ID'}) {
|
|
print "Processing file: $file ... League ID column found\n" unless $emailCsvArg;
|
|
$localLeague = 1;
|
|
}
|
|
else {
|
|
print "Processing file: $file ... No League ID column, must be MAL\n" unless $emailCsvArg;
|
|
$localLeague = 0;
|
|
}
|
|
|
|
my $csv =
|
|
"Name,Email,Phone,Address,Delegate District,Senate District,League ID,Join Date\n";
|
|
print $csv unless $emailCsvArg;
|
|
$csv = "";
|
|
|
|
while ( my $line = <$fh> ) {
|
|
|
|
my @fields = parse_csv_line($line);
|
|
|
|
# Helper to get field value by column name
|
|
my $get_field = sub {
|
|
my ($name) = @_;
|
|
my $idx = $col{$name};
|
|
unless (defined $idx) {
|
|
warn "ERROR: Column '$name' not found in file $file\n";
|
|
warn "Available columns: " . join(", ", keys %col) . "\n";
|
|
return undef;
|
|
}
|
|
my $val = $fields[$idx];
|
|
$val =~ s/^"//;
|
|
$val =~ s/"$//;
|
|
return $val;
|
|
};
|
|
|
|
my ($leagueId, $phone, $email, $joinDate, $status);
|
|
|
|
if ($localLeague) {
|
|
$leagueId = $get_field->('League ID');
|
|
$phone = $get_field->('Phone');
|
|
$email = $get_field->('Email');
|
|
$joinDate = $get_field->('Joined');
|
|
$status = $get_field->('Status');
|
|
}
|
|
else {
|
|
$leagueId = $stateLeagueID; # Default for MALs
|
|
$phone = $get_field->('Phone');
|
|
$email = $get_field->('Email');
|
|
$joinDate = $get_field->('Original Join Date');
|
|
$status = $get_field->('Status');
|
|
}
|
|
|
|
my $firstName = $get_field->('First Name');
|
|
my $lastName = $get_field->('Last Name');
|
|
my $street = $get_field->('Mailing Street');
|
|
my $city = $get_field->('Mailing City');
|
|
my $state = $get_field->('Mailing State');
|
|
my $zip = $get_field->('Mailing Postal Code');
|
|
|
|
# Check for undefined values (missing columns)
|
|
unless (defined $firstName && defined $lastName) {
|
|
warn "ERROR: Missing required First Name or Last Name in record, skipping\n";
|
|
next;
|
|
}
|
|
next if $street eq "Mailing Street";
|
|
|
|
# Filter for Primary status only (Primary, Primary - Life, etc.)
|
|
next unless $status =~ /^Primary.*|Lapsed/;
|
|
|
|
if ( $leagueId !~ /^$STATE/ ) {
|
|
next;
|
|
}
|
|
|
|
if ( $searchType ne "google"
|
|
&& $searchType ne "ALL" )
|
|
{
|
|
next if $leagueId ne $searchType;
|
|
}
|
|
|
|
# no need for commas now
|
|
$street =~ s/,//g; # Street
|
|
|
|
if ( $status ne "Inactive" ) {
|
|
|
|
if ( $emailCsvArg ) {
|
|
# CSV format: email,first_name,last_name
|
|
if ( email($email) ) {
|
|
print "$email,$firstName,$lastName\n";
|
|
}
|
|
}
|
|
elsif ( $emailArg ) {
|
|
# Original email format: First Last <email>
|
|
if ( email($email) ) {
|
|
print "$firstName $lastName <$email>\n";
|
|
}
|
|
}
|
|
else {
|
|
# Full CSV output with all fields
|
|
|
|
# contact for people with email address
|
|
if ( email($email) ) {
|
|
$csv = "$firstName $lastName,";
|
|
$csv .= "$email,";
|
|
}
|
|
else {
|
|
$csv = "$firstName $lastName,";
|
|
$csv .= ",";
|
|
}
|
|
|
|
if ($phone) {
|
|
|
|
#print "PHONE: $fields[10]\n";
|
|
$csv .= "$phone,";
|
|
}
|
|
else {
|
|
#print "PHONE: none\n";
|
|
$csv .= ",";
|
|
}
|
|
|
|
#print "ADDRESS $fields[12] $fields[13] $fields[14] $fields[16]\n";
|
|
|
|
# Check for actual street
|
|
if ($street) {
|
|
|
|
# Correct known incorrect addresses
|
|
if ( $street eq "309 2nd St Altizer" ) {
|
|
$street =~ s/309 2nd St Altizer/309 2nd St/;
|
|
}
|
|
if ( $street eq "!87 Gallaher Street" ) {
|
|
$street =~ s/\!87 Gallaher Street/187 Gallaher Street/;
|
|
}
|
|
|
|
$csv .= "$street $city $state $zip,";
|
|
|
|
# url encoding
|
|
$street =~ s/#/%23/;
|
|
|
|
# Address query
|
|
my $address = "$street $city $state $zip\n";
|
|
|
|
if ( $searchType eq "google" ) {
|
|
|
|
# Delegate District Query
|
|
my $queryDistricts =
|
|
"https://www.googleapis.com/civicinfo/v2/divisionsByAddress?"
|
|
. "key=${googleApiKey}&"
|
|
. "address=${address}";
|
|
|
|
sleep 2;
|
|
my $res = $curl->get($queryDistricts);
|
|
my $content = decode_json( $res->content );
|
|
|
|
my $divisions = $content->{'divisions'};
|
|
my $lowercase_state = lc($state);
|
|
|
|
# Pre-check for specific patterns
|
|
my $has_match = 0; # Flag to track if a match is found
|
|
foreach my $key ( keys %{$divisions} ) {
|
|
if ( $key =~ /state:$lowercase_state\/sld[ul]:\d+/ )
|
|
{
|
|
$has_match = 1;
|
|
last; # Exit loop early if a match is found
|
|
}
|
|
}
|
|
|
|
# print Dumper($divisions);
|
|
|
|
if ( $divisions && $state eq $STATE && $has_match ) {
|
|
;
|
|
|
|
foreach my $key ( keys %{$divisions} ) {
|
|
if ( $key =~
|
|
/ocd-division\/country:us\/state:$lowercase_state\/sldu:(\d+)/
|
|
)
|
|
{
|
|
my $senateDistrictNumber =
|
|
$1; # Capture the number for sldu
|
|
$csv .= "$senateDistrictNumber,";
|
|
}
|
|
elsif ( $key =~
|
|
/ocd-division\/country:us\/state:$lowercase_state\/sldl:(\d+)/
|
|
)
|
|
{
|
|
my $houseDistrictNumber =
|
|
$1; # Capture the number for sldl
|
|
$csv .= "$houseDistrictNumber,";
|
|
}
|
|
}
|
|
|
|
}
|
|
else {
|
|
$csv .= ",";
|
|
$csv .= ",";
|
|
}
|
|
|
|
$csv .= "$leagueId,";
|
|
}
|
|
|
|
}
|
|
else {
|
|
$csv .= "$city $state $zip,";
|
|
}
|
|
|
|
if ($joinDate) {
|
|
$csv .= "$joinDate,";
|
|
}
|
|
else {
|
|
$csv .= ",";
|
|
}
|
|
|
|
print $csv . "\n";
|
|
}
|
|
}
|
|
}
|
|
close $fh;
|
|
|
|
}
|
|
|
|
# Helper function to parse a CSV line into fields
|
|
# Handles quoted fields and commas within quotes
|
|
sub parse_csv_line {
|
|
my ($line) = @_;
|
|
chomp $line;
|
|
|
|
# Use the same regex pattern that was already in the code
|
|
my @fields = $line =~ m/(?:,|\n|^)("(?:(?:"")*[^"]*)*"|[^",\n]*|(?:\n|$))/g;
|
|
|
|
# Remove the extra empty field that the regex sometimes captures at the end
|
|
pop @fields while @fields && $fields[-1] eq '';
|
|
|
|
return @fields;
|
|
}
|
|
|
|
# skip over invalid and nonexistent email addresses
|
|
sub email {
|
|
|
|
my @arg = @_;
|
|
|
|
my $email = $arg[0];
|
|
|
|
if ( $email
|
|
&& $email !~
|
|
/^\w{1,2}\@aol.com|^\w{1,2}\@(em|m)ail.com|\@lwv.org|Email|(student|prando|ckarr)\@aol.com/
|
|
)
|
|
{
|
|
return 1;
|
|
}
|
|
else {
|
|
return 0;
|
|
}
|
|
|
|
}
|
|
|
|
sub help {
|
|
|
|
# Construct the dynamic League ID line
|
|
my $league_id_line = "League ID: ${STATE}000 (members at large) | $localLeagueIDs";
|
|
# Replace | with spaces and remove extra spaces
|
|
$league_id_line =~ s/\s*\|\s*/ /g;
|
|
my $help = "LLAW Google Civic Information API Query version 2
|
|
Copyright (C) 2025 - by Jonathan Rosenbaum <freesource\@freesoftwarepc.com>
|
|
This may be freely redistributed under the terms of the GNU General Public License
|
|
|
|
Note: Since https://portal.lwv.org came online, two csv files are now available, which should both be processed:
|
|
1. Local Leagues (Local Leagues) - which has all the local League members
|
|
2. MAL (Roster) - which has all the members at large
|
|
Depending on your request, this script will process either of those files, and query the Google Civic Information API for the Delegate and Senate Districts,
|
|
generate an information file or email file.
|
|
|
|
Usage: $0 google '*.csv' (queries Google Civic Api Delegate and Senate District for all LWVWV members)
|
|
$0 ${STATE}000 '*.csv' (show all information for members at large, but do not query Google)
|
|
$0 ${STATE}000 '*.csv' email (only show email addresses for members at large, but do not query Google)
|
|
$0 ${STATE}000 '*.csv' email-csv (only show email addresses in CSV format for members at large, but do not query Google)
|
|
|
|
1st argument can be one of these types:
|
|
google (all members with senate/delegate district query) which prints out this csv data:
|
|
'Name,Email,Phone,Address,Delegate District,Senate District,League ID,Join Date'
|
|
ALL (all members without senate/delegate district query)
|
|
$league_id_line
|
|
|
|
2nd argument must be the location of the LWVWV roster file, and prints out this csv information:
|
|
Name,Email,Phone,Address,Join Date,
|
|
|
|
3rd argument 'email' will only print out the email addresses, and only works with the ALL or League ID type argument
|
|
3rd argument 'email-csv' will print out email addresses in CSV format (email,first_name,last_name), and only works with the ALL or League ID type argument
|
|
|
|
|
|
You will want to send results to a file.
|
|
Example: $0 google '*.csv' > 2023-districts
|
|
\n";
|
|
|
|
print $help;
|
|
|
|
}
|