#!/usr/bin/env perl
use warnings;
use strict;

# PORTAL CHANGES - assumes you have roster permission for the State League group
# Since the portal came online, you now need to process two different types of cvs files:  One for all local Leagues (Local Leagues) & one for the MALs (Roster)
#                       *                       *           *       *       *               *               *               *                   *                                   *
# LEAGUE - First Name	Preferred First Name	Last Name	Email	Phone	Mailing Street	Mailing City	Mailing State	Mailing Postal Code	Mailing Country	League Name	        League ID	    Status	        Original Join Date	Expiration Date	Unique Contact Id	Unique Account Id
#                                                           (diff)  (diff)
# MAL -    First Name	Preferred First Name	Last Name	Phone	Email	Mailing Street	Mailing City	Mailing State	Mailing Postal Code	Mailing Country	Original Join Date	Expiration Date	Last Login Date	Unique Contact Id	Unique Account Id

# Required                  local Leagues              MAL
# Preferred First Name      [1]                        [1]
# Last Name                 [2]                        [2]
# Email                     [3]                        [4]
# Phone                     [4]                        [3]
# Mailing Street            [5]                        [5]
# Mailing City              [6]                        [6]
# Mailing State             [7]                        [7]
# Mailing Postal Code       [8]                        [8]
# League ID                 [11]                       Assumed
# Status                    [12] not used              not used
# Original Join Date        [13]                       [10]

# Google Civic Information API
# Quota: Queries per minute
# Current limit: 151
#
# Use sleep to limit queries to 30 per minute
# Currently there are 187 members in the whole state

# commandline options:  type roster-file email
# type:
#             google (all members with senate/delegate district query)
#             ALL (all members without senate/delegate district query)
#             League ID:  WV000 (members at large)  WV102 (Huntington) WV103 (Morgantown-Monogalia) WV107 (Wood County) WV112 (Jefferson)
# email (optional third arg):
#             email  (emails for specified type ALL (or google) or League ID)

# cpan App::cpanminus
# Now install any module you can find.
# cpanm Module::Name
use lib qw(/home/jr/perl5/lib/perl5);    # adjust perl lib as required
use WWW::Curl::Simple;
use File::Glob ':glob';
use JSON;
use Data::Dumper;
use vars qw($key $STATE $stateLeagueID $localLeagueIDs);
require "./env";

# Configuration - key for LWVWV
# put it in a file call env with one line $key = "thekeyfromgoogle"
my $googleApiKey = $key;

# Allow normal queries, too
my $searchType = $ARGV[0];

my @files;
if ( !$ARGV[0] ) {
    help();
    exit;
}
else {
    if ( $ARGV[0] !~ /google|ALL|$stateLeagueID|$localLeagueIDs/ ) {
        print "WRONG FIRST ARGUMENT\n";
        help();
        print "WRONG FIRST ARGUMENT\n";
        exit;
    }

    @files = bsd_glob( $ARGV[1] );    # Expand wildcard patterns (e.g., *.csv)
}

my $emailArg;
if ( $ARGV[2] ) {
    $emailArg = $ARGV[2];
}

# Open roster file .. hopefully columns remain the same,
# but @fields can be adjusted as required.
foreach my $file (@files) {

    #
    # $file = $ARGV[1];
    my $fh;
    $| = 1;
    open $fh, "<", $file;

    my $curl = WWW::Curl::Simple->new();

    # Read the first line
    my $localLeague;
    my $header = <$fh>;
    print "Header: $header";    # Output the header for reference
                                # Decide how to process
    if ( $header !~ /League ID/ ) {
        print "Processing file: $file ... No League ID found, must be MAL\n";
        $localLeague = 0;       # This is a Members At Large (MAL) file
    }
    else {
        print "Processing file: $file ... League ID found\n";
        $localLeague = 1;       # This is local League file
    }

    my $csv =
"Name,Email,Phone,Address,Delegate District,Senate District,League ID,Join Date\n";
    print $csv;
    $csv = "";

    while ( my $line = <$fh> ) {

# my @fields = split( /,/, $line );
# may be commas between those quotes
# pulled this regexp from https://stackoverflow.com/questions/18144431/regex-to-split-a-csv (solution 45)
        my @fields =
          $line =~ m/(?:,|\n|^)("(?:(?:"")*[^"]*)*"|[^",\n]*|(?:\n|$))/g
          ;    #m/("[^"]+"|[^,]+)(?:,\s*)?/g;
               # print $#fields . "\n";
        my $leagueId, my $phone, my $email, my $joinDate;
        if ($localLeague) {

            # Local League file processing
            # League ID is in column 11
            ( $leagueId = $fields[11] ) =~ s/"//g;    # League ID
            ( $phone    = $fields[4] )  =~ s/"//g;    # Phone
            ( $email    = $fields[3] )  =~ s/"//g;    # Email
            ( $joinDate = $fields[13] ) =~ s/"//g;    # Join Date
        }
        else {
            # Members At Large (MAL) file processing
            # League ID is assumed to be WV000 for all MALs
            $leagueId = $stateLeagueID;               # Default for MALs
            ( $phone    = $fields[3] )  =~ s/"//g;    # Phone
            ( $email    = $fields[4] )  =~ s/"//g;    # Email
            ( $joinDate = $fields[10] ) =~ s/"//g;    # Join Date
        }
        ( my $firstName = $fields[0] ) =~ s/"//g;    # First Name
        ( my $lastName  = $fields[2] ) =~ s/"//g;    # Last Name
        ( my $street    = $fields[5] ) =~ s/"//g;    # Street
        ( my $city      = $fields[6] ) =~ s/"//g;    # City
        ( my $state     = $fields[7] ) =~ s/"//g;    # State
        ( my $zip       = $fields[8] ) =~ s/"//g;    # Zip
        my $status = "true";
        next if $street eq "Mailing Street";

        if ( $leagueId !~ /^$STATE/ ) {
            next;
        }

        if (   $searchType ne "google"
            && $searchType ne "ALL" )
        {
            next if $leagueId ne $searchType;
        }

        # no need for commas now
        $street =~ s/,//g;    # Street

        if ( $status ne "Inactive" ) {

            if ( !$emailArg ) {

                # contact for people with email address
                if ( email($email) ) {
                    $csv = "$firstName $lastName,";
                    $csv .= "$email,";
                }
                else {
                    $csv = "$firstName $lastName,";
                    $csv .= ",";
                }

                if ($phone) {

                    #print "PHONE: $fields[10]\n";
                    $csv .= "$phone,";
                }
                else {
                    #print "PHONE: none\n";
                    $csv .= ",";
                }

             #print "ADDRESS $fields[12] $fields[13] $fields[14] $fields[16]\n";

                # Check for actual street
                if ($street) {

                    # Correct known incorrect addresses
                    if ( $street eq "309 2nd St Altizer" ) {
                        $street =~ s/309 2nd St Altizer/309 2nd St/;
                    }
                    if ( $street eq "!87 Gallaher Street" ) {
                        $street =~ s/\!87 Gallaher Street/187 Gallaher Street/;
                    }

                    $csv .= "$street $city $state $zip,";

                    # url encoding
                    $street =~ s/#/%23/;

                    # Address query
                    my $address = "$street $city $state $zip\n";

                    if ( $searchType eq "google" ) {

                        # Delegate District Query
                        my $queryDistricts =
"https://www.googleapis.com/civicinfo/v2/divisionsByAddress?"
                          . "key=${googleApiKey}&"
                          . "address=${address}";

                        sleep 2;
                        my $res     = $curl->get($queryDistricts);
                        my $content = decode_json( $res->content );

                        my $divisions       = $content->{'divisions'};
                        my $lowercase_state = lc($state);

                        # Pre-check for specific patterns
                        my $has_match = 0;   # Flag to track if a match is found
                        foreach my $key ( keys %{$divisions} ) {
                            if ( $key =~ /state:$lowercase_state\/sld[ul]:\d+/ )
                            {
                                $has_match = 1;
                                last;    # Exit loop early if a match is found
                            }
                        }

                        # print Dumper($divisions);

                        if ( $divisions && $state eq $STATE && $has_match ) {
                            ;

                            foreach my $key ( keys %{$divisions} ) {
                                if ( $key =~
/ocd-division\/country:us\/state:$lowercase_state\/sldu:(\d+)/
                                  )
                                {
                                    my $senateDistrictNumber =
                                      $1;    # Capture the number for sldu
                                    $csv .= "$senateDistrictNumber,";
                                }
                                elsif ( $key =~
/ocd-division\/country:us\/state:$lowercase_state\/sldl:(\d+)/
                                  )
                                {
                                    my $houseDistrictNumber =
                                      $1;    # Capture the number for sldl
                                    $csv .= "$houseDistrictNumber,";
                                }
                            }

                        }
                        else {
                            $csv .= ",";
                            $csv .= ",";
                        }

                        $csv .= "$leagueId,";
                    }

                }
                else {
                    $csv .= "$city $state $zip,";
                }

                if ($joinDate) {
                    $csv .= "$joinDate,";
                }
                else {
                    $csv .= ",";
                }

                print $csv . "\n";
            }
            else {
                if ( email($email) ) {
                    print "$firstName $lastName <$email>\n";
                }
            }
        }
    }
    close $fh;

}

# skip over invalid and nonexistent email addresses
sub email {

    my @arg = @_;

    my $email = $arg[0];

    if (   $email
        && $email !~
/^\w{1,2}\@aol.com|^\w{1,2}\@(em|m)ail.com|\@lwv.org|Email|(student|prando|ckarr)\@aol.com/
      )
    {
        return 1;
    }
    else {
        return 0;
    }

}

sub help {

    # Construct the dynamic League ID line
    my $league_id_line = "League ID: ${STATE}000 (members at large) | $localLeagueIDs";
    # Replace | with spaces and remove extra spaces
    $league_id_line =~ s/\s*\|\s*/ /g;
    my $help = "LLAW Google Civic Information API Query version 2
Copyright (C) 2025 - by Jonathan Rosenbaum <freesource\@freesoftwarepc.com>
This may be freely redistributed under the terms of the GNU General Public License

Note:  Since https://portal.lwv.org came online, two csv files are now available, which should both be processed:
    1. Local Leagues (Local Leagues) - which has all the local League members
    2. MAL (Roster) - which has all the members at large
Depending on your request, this script will process either of those files, and query the Google Civic Information API for the Delegate and Senate Districts,
generate an information file or email file.

Usage:  $0 google '*.csv'  (queries Google Civic Api Delegate and Senate District for all LWVWV members)
        $0 ${STATE}000 '*.csv' (show all information for members at large, but do not query Google)
        $0 ${STATE}000 '*.csv' email (only show email addresses for members at large, but do not query Google)

1st argument can be one of these types:
    google (all members with senate/delegate district query) which prints out this csv data:
        'Name,Email,Phone,Address,Delegate District,Senate District,League ID,Join Date'
    ALL (all members without senate/delegate district query)
    $league_id_line

2nd argument must be the location of the LWVWV roster file, and prints out this csv information:
    Name,Email,Phone,Address,Join Date,

3rd argument 'email' will only print out the email addresses, and only works with the ALL or League ID type argument
    

You will want to send results to a file.
Example:  $0 google '*.csv' > 2023-districts    
\n";

    print $help;

}