#!perl

use Encode; 
use HTML::Entities ();

# You will need to install this perl module:
use XML::Twig;

#   bugfix: 9 July 2009
#   bugfix and small feature update: 15 July 2008
#   Jason Casden
#   casden@gmail.com
#   10 June 2004
#   Jerome Lawrence and Robert E. Lee Theatre Research Institute
#   The Ohio State University
#
#   This script takes XML input, "normalizes" each <unitdate> field to
#   the ISO 8601 format.  This script handles both individual dates, and date
#   ranges.  Anything listed in sub long_date   can be used as part of a range,
#   and the range can be any combination of those date formats.
#   If the script cannot interpret the input date, it will prompt the user to
#   input a normalized form by hand.  The output of the script is in formatted
#   XML.  The output filename is the same as the input, appended with .out .
#   The original dates are kept intact, and the normalized forms are added
#   as a normal="" attribute to the appropriate <unitdate>.
#
#   If you run into any problems, please feel free to email me at 
#   casden@gmail.com

# Use UTF-8 Unicode for default file I/O

use open ':utf8';

select STDOUT;

while(){
    print "\n\nRun in verbose mode? (y/n): ";
    chomp ($verbose_mode = <STDIN>);

    $verbose_mode = lc($verbose_mode);
    # Remove trailing whitespace
    $verbose_mode =~ s/\s*$//g;
    if ($verbose_mode =~ /^[yn]$/) {
        last;
    }
    else {
        print "y or n, please\n";
    }
}

while(){
    print "\n\nReplace all previous 'normal' attributes in <unitdate> fields? (y/n): ";
    chomp ($norm_in = <STDIN>);
    $norm_in=lc($norm_in);
    # Remove trailing whitespace
    $norm_in =~ s/\s*$//g;
    if($norm_in =~ /^[yn]$/) {
        if ($norm_in == 'y'){
            $replace_norms = 1;
        } else {
            $replace_norms = 0;
        }
        last; 
    } else {
        print "y or n, please\n";
    }

}


$verbose_counter = 0;

print "\n\nPlease enter the filename of the XML file with dates to be normalized\n> ";
chomp ($xmlfile = <STDIN>);

if (!(open XMLDATA, "$xmlfile")) # see if the file can be opened
{
    die "The input file " . $xmlfile . " cannot be opened for reading.\n";
}

$xml_out = $xmlfile . '.out';

# see if the file.out can be written to
if (!(open XMLOUTPUT, ">:utf8", "$xml_out")) 
{ 
    die "The output file " . $xml_out . " can't be opened.\n";
}

close XMLDATA;

select XMLOUTPUT;




$date_count = 0;

select STDOUT;
print "\n         ***************************************\n";
print   "         * Finding the collection's date range *\n";
print   "         ***************************************\n";

my $twig_handlers = {'ead/archdesc/did/unitdate' => \&cont_break};                     

my $twig = new XML::Twig(TwigHandlers => $twig_handlers, NoExpand => 1, ParseParamEnt => 0);






$twig->parsefile($xmlfile);

select XMLOUTPUT;
$twig->set_pretty_print('record');
$twig->print;           # re-output the XML, with the normalized dates

close XMLOUTPUT;
# now parse the unitdates

select STDOUT;

print "\n         ***************************************\n";
print   '         *  Normalizing the <unitdate> fields  *' . "\n";
print   "         ***************************************\n\n";

my $twig_handlers = {'unitdate' => \&date_norm};                     

my $twig = new XML::Twig(TwigHandlers => $twig_handlers, NoExpand => 1, ParseParamEnt => 0);



$twig->parsefile($xml_out);

open XMLOUTPUT, ">:utf8", "$xml_out";
select XMLOUTPUT;
$twig->set_pretty_print('record');
$twig->print;           # re-output the XML, with the normalized dates

select STDOUT;
print "\n\nProcessed " . $date_count . ' <unitdate> fields.' . "\n\n";

#
# THE END OF THE PROGRAM
#




#
# THE START OF THE SUBPROCEDURES
#


#
#
#
# cont_break SUBPROCEDURE
# used to find date ranges
#
#
#



# Run this whenever I encounter a <ead>/<archdesc>/<did>/<unitdate>
# the date range of the document should be in this field (after it's normalized)

sub cont_break   
{

    select STDOUT;
    my ($t, $u_date) = @_;    
# you can probably just make this a single variable
    # $daterange = $u_date->text(); 
    # Added '##' placeholder for Michele Combs
    if ((!$u_date->att('normal')) || ($u_date->att('normal') eq '##') || ($replace_norms==1))  # has it already been normalized?
    {
# it wasn't already normalized, so normalize it
        $tempvar = $u_date->text();
        date_norm2 ($tempvar, $daterange);
        $u_date->set_att('normal', $norm_date);
        $date_count++;     
    }

# the normalized date is the date range 
    $daterange = $u_date->att('normal');
}



#
#
#
# date_norm2 SUBPROCEDURE
# called by date_norm to normalize dates
#
#
#


sub date_norm2   # run this whenever I encounter a <c02>/<did>/<unittitle>/<unitdate>
{
    my ($human_date, $daterange) = @_;



    # Preserve the original date to print to the user in case we can't figure it out
    $orig_date = $human_date;    



# strip off ALL brackets
    $human_date =~ s/[\[\]]//g;

# Replace [ongoing] with 9999
    $human_date =~ s/\W+(ongoing)\s*$/9999/g;

# strip off the possible initial Date: as int "Date: ca. 1935"

    if ($human_date =~ /^\s*[Dd][Aa][Tt][Ee]:(.*)$/)
    {
        $human_date = $1;
    }

# remove a trailing period
    if ($human_date =~ /(.*)\.\s*$/)
    {
        $human_date = $1;
    }

# remove a trailing comma
    if ($human_date =~ /(.*)\,\s*$/)
    {
        $human_date = $1;
    }

# remove a leading period
    if ($human_date =~ /(.*)\.\s*$/)
    {
        $human_date = $1;
    }

# remove a leading comma
    if ($human_date =~ /^\s*\,/)
    {
        $human_date = $1;
    }

# Trash question marks

    # Get the first one 
    $human_date =~ s/(\d{4})\?/\1/g;
    # Get the second one, if it's there
    $human_date =~ s/(\d{4})\?/\1/g;

# test
# turn all "January 1950s" into "January 1950"
# But leave "1950s" for later in the script

# This same line is run twice to take care of ranges like
# January 1950s-January 1960s

#$human_date =~ s/(.*\S+.*\d{3,4})'{0,1}s/\1/g;
#$human_date =~ s/(.*\S+.*\d{3,4})'{0,1}s/\1/g;

# January 1950s - January 1962 (get rid of 's')
#$human_date =~ s/^(.*\d{3,4})'{0,1}s(\s*[-\/].*\d{3,4}.*)$/\1\2/g;


# January 1952 - January 1960s
#$human_date =~ s/^(.*\d{3,4}.*[-\/].*\d{3,4})'{0,1}s(.*)$/\1\2/g;





# end test




    # the super-long switch statement.
    # now that I look at this, I think I probably could have made this a little
    # shorter
    $tryrange = 0;
    find_date_format: {

        # Most of the date formats are now taken care of in long_date()


# ranges starting with these two date formats don't play well with others,
# so they've been separated

        # 1950-08-17
        $human_date =~ /^\s*(\d{4})\s*[^\d]\s*(\d{1,2})\s*[^\d]\s*(\d{1,2})\s*$/ && do{
            $norm_date = $1 . '-' . $2 . '-' . $3;            
            last find_date_format;
        };    


        # 1950-08
        $human_date =~ /^\s*(\d{4})\s*[^\d]\s*(\d{1,2})\s*$/ && do{
            $norm_date = $1 . '-' . $2;
            last find_date_format;
        };

        # 1950-08-17 - ??
        $human_date =~ /^\s*(\d{4})\s*[^\d]\s*(\d{1,2})\s*[^\d]\s*(\d{1,2})\s*[-\/](.*)$/ && do{
            $norm_date = $1 . '-' . $2 . '-' . $3 . '/' . long_date($4);            
            last find_date_format;
        };    




        # anything listed immediately below cannot be used as part of a date range    

        # 1950-08 - ??
        $human_date =~ /^\s*(\d{4})\s*[^\d]\s*(\d{1,2})\s*[-\/](.*)$/ && do{
            $norm_date = $1 . '-' . $2 . '/' . long_date($3);
            last find_date_format;
        };    




        # 19th Century
        $human_date =~ /(\d{1,2})([Tt][Hh]|[Rr][Dd]|[Nn][Dd]|[Ss][Tt])\s*[Cc]ent/ && do{
            $norm_date = (($1 *100)- 99) . '/' . ($1 * 100);
            last find_date_format;
        };

        # 1920       
        $human_date =~ /^\s*(\d{4})\s*$/ && do{
            $norm_date = $1;
            last find_date_format;
        };   

        # 1920?    

#             $human_date =~ /^\s*(\d{4})\?\s*$/ && do{
#             $norm_date = $1;
#             last find_date_format;
#             };   

# 1920? or 1920?-1950?
#           $human_date =~ /^\s*(\d{4})\?\s*[-\/]\s*(\d{4})\?{0,1}\s*$/ && do{
#                 $norm_date = $human_date;
#                 # Get the first one 
#                 $human_date =~ s/(\d{4})\?/\1/g;
#                 # Get the second one, if it's there
#                 $human_date =~ s/(\d{4})\?/\1/g;
#                 
#                 # change the dash to a slash, if it's there
#                 
#                 $norm_date =~ s/\d{4}\s*[-]\s*\d{4}//g;
#                 
#                 last find_date_format;
#             };   


        # 1920-1952
        # 1920's- 1952
#        $human_date =~ /^\s*(\d{4})\s*[^\d]\s*(\d{4})\s*$/ && do{
        $human_date =~ /^\s*(\d{3,4})'{0,1}s{0,1}\s*[-\/]\s*(\d{3,4})\s*$/ && do{        
            $norm_date = $1 . '/' . $2;
            last find_date_format;
        };

        # 1920s-1950's
        # 1920 - 1950's
        # translate into 1920/1959
        $human_date =~ /^\s*(\d{3,4})'{0,1}s{0,1}\s*[-\/]\s*(\d{3,4})'{0,1}s\s*$/ && do{
            $norm_date = $1 . '/' . ($2 + 9);
            last find_date_format;
        };

        # ca. 1945
        $human_date =~ /^\s*[Cc][^\d]*(\d{4})/ && do{
            $norm_date = ($1 - 5) . '/' . ($1 + 5);
            last find_date_format;
        };

        # 1950s, 1950's
        $human_date =~ /^\s*(\d{4})'{0,1}s\s*$/ && do{
            $norm_date = $1 . '/' . ($1 + 9);
            last find_date_format;
        };   


        # January 1950s, January 1950's =  1950/1959
        # Should I do this at all?  Not being done at the moment.  Maybe I should leave it to the user...
#        $human_date =~ /^\s*([a-zA-Z]+\s+)(\d{3,4})'{0,1}s\s*$/ && do{
#            $norm_date = $1 . $2 . '/' . ($2 + 9);
#            last find_date_format;
#            };   



        # undated
        $human_date =~ /^\s*[Uu][Nn][Dd][Aa][Tt][Ee][Dd]\s*$/ && do{
            $norm_date = $daterange;
            last find_date_format;
        };    

        # n.d.
        $human_date =~ /^\s*[Nn]\.[Dd]\.\s*$/ && do{
            $norm_date = $daterange;
            last find_date_format;            
        };

        # empty field
        $human_date =~ /^\s*$/ && do{
            $norm_date = $daterange;
            last find_date_format;            
        };            


        $norm_date = "CRAP";  # put this in here in case we can't understand the date


# Check not only for January 17, 1950, etc here, but also ranges, like
# January 1950 - January 17, 1951
# or 12-01-1972 - 12-12-1986
# but NOT 12-01-72 - 12-12-1986 (no two-digit year on the left side)

#if ($human_date =~ /\d{4}\s*[-\/]/)
        if ($human_date =~ /\d{4}\s*[-\/]/)
        {
#	$human_date =~ /(.*\d{4})\s*[-\/](.*)/;
# this may only get the right half of the range
            $human_date =~ /(\d{4})\s*[-\/](.*)/;
            $ld2 = $2;

# this will get the rest of the left half of the range
# into @lefthalf[0]
# This splits the date string into everything left of
# and right of what's in $1

            @lefthalf = split (/\d{4}\s*[-\/]/, $human_date);

# assemble the left half
            $ld1 = @lefthalf[0] . $1;

            #
# CREATE THE DATE RANGE
# $ld1 is the left half, $ld2 is the right half
# normalize each half, and put them together with a right slash
            #
            $norm_date = long_date($ld1) . '/' . long_date($ld2);
        } 
## !!! Test section starts here!!!
# January - March 1979
        elsif ($human_date =~ /^([^a-zA-Z]*[a-zA-z]{3,9}.*)([-\/].*)([a-zA-Z]{3,9}.*)(\d{4})(.*)$/)
        {

            # rebuild date string into January 1979 - March 1979
            $human_date3 = $1 . ' ' . ' ' . $4 . $2 . $3 . $4 . $5;
#        print $human_date3;

#    $norm_date = long_date($human_date3);

            #	$human_date =~ /(.*\d{4})\s*[-\/](.*)/;
# this may only get the right half of the range
            $human_date3 =~ /(\d{4})\s*[-\/](.*)/;
            $ld2 = $2;

# this will get the rest of the left half of the range
# into @lefthalf[0]
# This splits the date string into everything left of
# and right of what's in $1

            @lefthalf = split (/\d{4}\s*[-\/]/, $human_date3);

# assemble the left half
            $ld1 = @lefthalf[0] . $1;

            #
# CREATE THE DATE RANGE
# $ld1 is the left half, $ld2 is the right half
# normalize each half, and put them together with a right slash
            #
            $norm_date = long_date($ld1) . '/' . long_date($ld2);


        } 
# January 17 - 28 1979
        elsif ($human_date =~ /^([^a-zA-Z]*)([a-zA-z]{3,9})(.*)([-\/]\s*)(\d{1,2}.*)(\d{4})(.*)$/)
        {
            # rebuild date string into January 17 1979 - January 27 1979

            $human_date3 = $1 . $2 . $3 . ' ' . $6 . ' ' . $4 . ' ' . $2 . ' ' . $5 . $6 . $7;

#    $human_date3 =~ s/\s{2,9999}/ /g;
#    print "\nhi: $human_date3";

#    $norm_date = long_date($human_date3);

            #	$human_date =~ /(.*\d{4})\s*[-\/](.*)/;
# this may only get the right half of the range
            $human_date3 =~ /(\d{4})\s*[-\/](.*)/;
            $ld2 = $2;

# this will get the rest of the left half of the range
# into @lefthalf[0]
# This splits the date string into everything left of
# and right of what's in $1

            @lefthalf = split (/\d{4}\s*[-\/]/, $human_date3);

# assemble the left half
            $ld1 = @lefthalf[0] . $1;

            #
# CREATE THE DATE RANGE
# $ld1 is the left half, $ld2 is the right half
# normalize each half, and put them together with a right slash
            #
            $norm_date = long_date($ld1) . '/' . long_date($ld2);


        }
## !!! Test section ends here
        else
        {
# it's just a single date unit (not a date range)
            $norm_date = long_date($human_date);
        }



# if we couldn't identify the date            
        if ($norm_date =~ /CRAP/)
        {            
            select STDOUT;
            print "\n-------------------------\n";
            print "unrecognized date format:\n\n";
            print "\"" . $orig_date . "\"\n\n";
            if (length $daterange > 0){
                print "The date range for this collection is: \"" . $daterange . "\"\n";
            } else
            { print "The date range for this collection has not been determined\n";
            }

            print "\nPlease input the ISO 8601 normalized form of this date,\n";
            print " or just hit ENTER to accept the collection's date range\n";
            print " as the normalized date:\n> ";
            chomp ($norm_date = <STDIN>);
            if(length $norm_date < 1)
            {
                $norm_date = $daterange;
            }
            print "-------------------------\n\n";
        }



    }


    select STDOUT;

    if ($verbose_mode eq 'y')
    {


        print "\nORIGINAL DATE: $orig_date\n";
        print "NORMALIZED DATE: $norm_date\n";


        $verbose_counter++;        
        if ($verbose_counter > 4)
        {
            print "\n\tPress \"Enter\" to continue> ";
            $verb_cont = <STDIN>;
            $verbose_counter = 0;
        }

    }

}


#
#
#
# date_norm SUBPROCEDURE
# calls date_norm2 to normalize dates
# I can't remember why I did it this way.
#

sub date_norm{

    my ($t, $un_date) = @_;
    # Added support for '##' normal atts for Michele Combs
    if ((!$un_date->att('normal')) || ($un_date->att('normal') eq '##') || ($replace_norms==1)){

        $temp_human_date = $un_date->text();

        date_norm2($temp_human_date, $daterange);


        $norm_date = HTML::Entities::encode($norm_date);
        $un_date->set_att('normal', $norm_date);
        select STDOUT;
        $date_count++;
    }
}


#
#
#
# long_date SUBPROCEDURE
# Called by date_norm2 to handle most date formats, particulary
# the ones used in ranges.
#


sub long_date
{
    my ($ld_human_date) = @_;

    # Convert "1950 August 17" to "17 August 1950"
    $ld_human_date =~ /^\s*(\d{4})\s*([A-Za-z]+)[^\d]+(\d{1,2})\s*$/ && do{
        $ld_human_date = "$3 $2 $1";
    };   

    find_ld_format: {

        # 1950-08-17
        $ld_human_date =~ /^\s*(\d{4})\s*[^\d]\s*(\d{1,2})\s*[^\d]\s*(\d{1,2})\s*$/ && do{
            $ld_norm_date = $1 . '-' . $2 . '-' . $3;            
            last find_ld_format;
        };    


        # 1950-08
        $ld_human_date =~ /^\s*(\d{4})\s*[^\d]\s*(\d{1,2})\s*$/ && do{
            $ld_norm_date = $1 . '-' . $2;
            last find_ld_format;
        };


        # 8-17-1950 or 8/17/1950
        $ld_human_date =~ /^\s*(\d{1,2})[^\d]+(\d{1,2})[^\d]+(\d{4})(.*)\s*$/ && do{
            $ld_norm_date = $3 . '-' . $1 . '-' . $2;
            last find_ld_format;
        };            

        # Various forms of "January blah blah"


# use sprintf to pad single-digit days with a zero
        # January 17, 1970
        $ld_human_date =~ /^\s*[Jj]an[^\d]*(\d{1,2})[^\d]+(\d{4})\s*$/ && do{
            $ld_norm_date = sprintf "%04d%04s%02d", ($2, '-01-', $1);

            last find_ld_format;
        };                

        # February 17, 1970
        $ld_human_date =~ /^\s*[Ff]eb[^\d]*(\d{1,2})[^\d]+(\d{4})\s*$/ && do{
            $ld_norm_date = sprintf "%04d%04s%02d", ($2, '-02-', $1);

            last find_ld_format;
        };                

        # March 17, 1970
        $ld_human_date =~ /^\s*[Mm]ar[^\d]*(\d{1,2})[^\d]+(\d{4})\s*$/ && do{
            $ld_norm_date = sprintf "%04d%04s%02d", ($2, '-03-', $1);

            last find_ld_format;
        };                

        # April 17, 1970
        $ld_human_date =~ /^\s*[Aa]pr[^\d]*(\d{1,2})[^\d]+(\d{4})\s*$/ && do{
            $ld_norm_date = sprintf "%04d%04s%02d", ($2, '-04-', $1);

            last find_ld_format;
        };                

        # May 17, 1970
        $ld_human_date =~ /^\s*[Mm]ay[^\d]*(\d{1,2})[^\d]+(\d{4})\s*$/ && do{
            $ld_norm_date = sprintf "%04d%04s%02d", ($2, '-05-', $1);

            last find_ld_format;
        };                

        # June 17, 1970
        $ld_human_date =~ /^\s*[Jj]un[^\d]*(\d{1,2})[^\d]+(\d{4})\s*$/ && do{
            $ld_norm_date = sprintf "%04d%04s%02d", ($2, '-06-', $1);

            last find_ld_format;
        };                

        # July 17, 1970
        $ld_human_date =~ /^\s*[Jj]ul[^\d]*(\d{1,2})[^\d]+(\d{4})\s*$/ && do{
            $ld_norm_date = sprintf "%04d%04s%02d", ($2, '-07-', $1);

            last find_ld_format;
        };                

        # August 17, 1970
        $ld_human_date =~ /^\s*[Aa]ug[^\d]*(\d{1,2})[^\d]+(\d{4})\s*$/ && do{
            $ld_norm_date = sprintf "%04d%04s%02d", ($2, '-08-', $1);

            last find_ld_format;
        };                

        # September 17, 1970
        $ld_human_date =~ /^\s*[Ss]ep[^\d]*(\d{1,2})[^\d]+(\d{4})\s*$/ && do{
            $ld_norm_date = sprintf "%04d%04s%02d", ($2, '-09-', $1);

            last find_ld_format;
        };                

        # October 17, 1970
        $ld_human_date =~ /^\s*[Oo]ct[^\d]*(\d{1,2})[^\d]+(\d{4})\s*$/ && do{
            $ld_norm_date = sprintf "%04d%04s%02d", ($2, '-10-', $1);

            last find_ld_format;
        };                

        # November 17, 1970
        $ld_human_date =~ /^\s*[Nn]ov[^\d]*(\d{1,2})[^\d]+(\d{4})\s*$/ && do{
            $ld_norm_date = sprintf "%04d%04s%02d", ($2, '-11-', $1);

            last find_ld_format;
        };                

        # December 17, 1970
        $ld_human_date =~ /^\s*[Dd]ec[^\d]*(\d{1,2})[^\d]+(\d{4})\s*$/ && do{
            $ld_norm_date = sprintf "%04d%04s%02d", ($2, '-12-', $1);

            last find_ld_format;
        };                



# Add 17 January 1970 formats

        # 17 January, 1970
        $ld_human_date =~ /^\s*(\d{1,2})\s*[Jj]an[^\d]+(\d{4})\s*$/ && do{
            $ld_norm_date = sprintf "%04d%04s%02d", ($2, '-01-', $1);

            last find_ld_format;
        };   

        # 17 February, 1970
        $ld_human_date =~ /^\s*(\d{1,2})\s*[Ff]eb[^\d]+(\d{4})\s*$/ && do{
            $ld_norm_date = sprintf "%04d%04s%02d", ($2, '-02-', $1);

            last find_ld_format;
        };   


        # 17 March, 1970
        $ld_human_date =~ /^\s*(\d{1,2})\s*[Mm]ar[^\d]+(\d{4})\s*$/ && do{
            $ld_norm_date = sprintf "%04d%04s%02d", ($2, '-03-', $1);

            last find_ld_format;
        };   

        # 17 April, 1970
        $ld_human_date =~ /^\s*(\d{1,2})\s*[Aa]pr[^\d]+(\d{4})\s*$/ && do{
            $ld_norm_date = sprintf "%04d%04s%02d", ($2, '-04-', $1);

            last find_ld_format;
        };   


        # 17 May, 1970
        $ld_human_date =~ /^\s*(\d{1,2})\s*[Mm]ay[^\d]+(\d{4})\s*$/ && do{
            $ld_norm_date = sprintf "%04d%04s%02d", ($2, '-05-', $1);

            last find_ld_format;
        };   

        # 17 June, 1970
        $ld_human_date =~ /^\s*(\d{1,2})\s*[Jj]un[^\d]+(\d{4})\s*$/ && do{
            $ld_norm_date = sprintf "%04d%04s%02d", ($2, '-06-', $1);

            last find_ld_format;
        };   


        # 17 July, 1970
        $ld_human_date =~ /^\s*(\d{1,2})\s*[Jj]ul[^\d]+(\d{4})\s*$/ && do{
            $ld_norm_date = sprintf "%04d%04s%02d", ($2, '-07-', $1);

            last find_ld_format;
        };   

        # 17 August, 1970
        $ld_human_date =~ /^\s*(\d{1,2})\s*[Aa]ug[^\d]+(\d{4})\s*$/ && do{
            $ld_norm_date = sprintf "%04d%04s%02d", ($2, '-08-', $1);

            last find_ld_format;
        };   


        # 17 September, 1970
        $ld_human_date =~ /^\s*(\d{1,2})\s*[Ss]ep[^\d]+(\d{4})\s*$/ && do{
            $ld_norm_date = sprintf "%04d%04s%02d", ($2, '-09-', $1);

            last find_ld_format;
        };   

        # 17 October, 1970
        $ld_human_date =~ /^\s*(\d{1,2})\s*[Oo]ct[^\d]+(\d{4})\s*$/ && do{
            $ld_norm_date = sprintf "%04d%04s%02d", ($2, '-10-', $1);

            last find_ld_format;
        };   


        # 17 November, 1970
        $ld_human_date =~ /^\s*(\d{1,2})\s*[Nn]ov[^\d]+(\d{4})\s*$/ && do{
            $ld_norm_date = sprintf "%04d%04s%02d", ($2, '-11-', $1);

            last find_ld_format;
        };   

        # 17 December, 1970
        $ld_human_date =~ /^\s*(\d{1,2})\s*[Dd]ec[^\d]+(\d{4})\s*$/ && do{
            $ld_norm_date = sprintf "%04d%04s%02d", ($2, '-12-', $1);

            last find_ld_format;
        };   


        # January 1970
        $ld_human_date =~ /^\s*[Jj]an[^\d]*(\d{4})\s*$/ && do{
            $ld_norm_date = $1 . '-01';

            last find_ld_format;
        };                                                            

        # February 1970
        $ld_human_date =~ /^\s*[Ff]eb[^\d]*(\d{4})\s*$/ && do{
            $ld_norm_date = $1 . '-02';

            last find_ld_format;
        };     

        # March 1970
        $ld_human_date =~ /^\s*[Mm]ar[^\d]*(\d{4})\s*$/ && do{
            $ld_norm_date = $1 . '-03';

            last find_ld_format;
        };     

        # April 1970
        $ld_human_date =~ /^\s*[Aa]pr[^\d]*(\d{4})\s*$/ && do{
            $ld_norm_date = $1 . '-04';

            last find_ld_format;
        };     

        # May 1970
        $ld_human_date =~ /^\s*[Mm]ay[^\d]*(\d{4})\s*$/ && do{
            $ld_norm_date = $1 . '-05';

            last find_ld_format;
        };                                                     


        # June 1970
        $ld_human_date =~ /^\s*[Jj]un[^\d]*(\d{4})\s*$/ && do{
            $ld_norm_date = $1 . '-06';

            last find_ld_format;
        };     

        # July 1970
        $ld_human_date =~ /^\s*[Jj]ul[^\d]*(\d{4})\s*$/ && do{
            $ld_norm_date = $1 . '-07';

            last find_ld_format;
        };     

        # August 1970
        $ld_human_date =~ /^\s*[Aa]ug[^\d]*(\d{4})\s*$/ && do{
            $ld_norm_date = $1 . '-08';

            last find_ld_format;
        };     

        # September 1970
        $ld_human_date =~ /^\s*[Ss]ep[^\d]*(\d{4})\s*$/ && do{
            $ld_norm_date = $1 . '-09';

            last find_ld_format;
        };     



        # October 1970
        $ld_human_date =~ /^\s*[Oo]ct[^\d]*(\d{4})\s*$/ && do{
            $ld_norm_date = $1 . '-10';

            last find_ld_format;
        };     


        # November 1970
        $ld_human_date =~ /^\s*[Nn]ov[^\d]*(\d{4})\s*$/ && do{
            $ld_norm_date = $1 . '-11';

            last find_ld_format;
        };     


        # December 1970
        $ld_human_date =~ /^\s*[Dd]ec[^\d]*(\d{4})\s*$/ && do{
            $ld_norm_date = $1 . '-12';
            last find_ld_format;
        };     
        $ld_norm_date = "CRAP";
    }

    return $ld_norm_date;
}

