#!perl
use Encode;
use HTML::Entities ();
# You will need to install this perl module:
use XML::Twig;
# bugfix and small feature update: 15 July 2008
# Jason Casden
# casden@gmail.com
# 10 June 2004
# Jerome Lawrence and Robert E. Lee Theatre Research Institute
# The Ohio State University
#
# This script takes XML input, "normalizes" each field to
# the IS 8601 format. This script handles both individual dates, and date
# ranges. Anything listed in sub long_date can be used as part of a range,
# and the range can be any combination of those date formats.
# If the script cannot interpret the input date, it will prompt the user to
# input a normalized form by hand. The output of the script is in formatted
# XML. The output filename is the same as the input, appended with .out .
# The original dates are kept intact, and the normalized forms are added
# as a normal="" attribute to the appropriate .
#
# If you run into any problems, please feel free to email me at
# casden@email.unc.edu
# Use UTF-8 Unicode for default file I/O
use open ':utf8';
select STDOUT;
while(){
print "\n\nRun in verbose mode? (y/n): ";
chomp ($verbose_mode = );
$verbose_mode = lc($verbose_mode);
# Remove trailing whitespace
$verbose_mode =~ s/\s*$//g;
if ($verbose_mode =~ /^[yn]$/) {
last;
}
else {
print "y or n, please\n";
}
}
while(){
print "\n\nReplace all previous 'normal' attributes in fields? (y/n): ";
chomp ($norm_in = );
$norm_in=lc($norm_in);
# Remove trailing whitespace
$norm_in =~ s/\s*$//g;
if($norm_in =~ /^[yn]$/) {
if ($norm_in == 'y'){
$replace_norms = 1;
} else {
$replace_norms = 0;
}
last;
} else {
print "y or n, please\n";
}
}
$verbose_counter = 0;
print "\n\nPlease enter the filename of the XML file with dates to be normalized\n> ";
chomp ($xmlfile = );
if (!(open XMLDATA, "$xmlfile")) # see if the file can be opened
{
die "The input file " . $xmlfile . " cannot be opened for reading.\n";
}
$xml_out = $xmlfile . '.out';
# see if the file.out can be written to
if (!(open XMLOUTPUT, ">:utf8", "$xml_out"))
{
die "The output file " . $xml_out . " can't be opened.\n";
}
close XMLDATA;
select XMLOUTPUT;
$date_count = 0;
select STDOUT;
print "\n ***************************************\n";
print " * Finding the collection's date range *\n";
print " ***************************************\n";
my $twig_handlers = {'ead/archdesc/did/unitdate' => \&cont_break};
my $twig = new XML::Twig(TwigHandlers => $twig_handlers, NoExpand => 1, ParseParamEnt => 0);
$twig->parsefile($xmlfile);
select XMLOUTPUT;
$twig->set_pretty_print('record');
$twig->print; # re-output the XML, with the normalized dates
close XMLOUTPUT;
# now parse the unitdates
select STDOUT;
print "\n ***************************************\n";
print ' * Normalizing the fields *' . "\n";
print " ***************************************\n\n";
my $twig_handlers = {'unitdate' => \&date_norm};
my $twig = new XML::Twig(TwigHandlers => $twig_handlers, NoExpand => 1, ParseParamEnt => 0);
$twig->parsefile($xml_out);
open XMLOUTPUT, ">:utf8", "$xml_out";
select XMLOUTPUT;
$twig->set_pretty_print('record');
$twig->print; # re-output the XML, with the normalized dates
select STDOUT;
print "\n\nProcessed " . $date_count . ' fields.' . "\n\n";
#
# THE END OF THE PROGRAM
#
#
# THE START OF THE SUBPROCEDURES
#
#
#
#
# cont_break SUBPROCEDURE
# used to find date ranges
#
#
#
# Run this whenever I encounter a ///
# the date range of the document should be in this field (after it's normalized)
sub cont_break
{
select STDOUT;
my ($t, $u_date) = @_;
# you can probably just make this a single variable
# $daterange = $u_date->text();
# Added '##' placeholder for Michele Combs
if ((!$u_date->att('normal')) || ($u_date->att('normal') eq '##') || ($replace_norms==1)) # has it already been normalized?
{
# it wasn't already normalized, so normalize it
$tempvar = $u_date->text();
date_norm2 ($tempvar, $daterange);
$u_date->set_att('normal', $norm_date);
$date_count++;
}
# the normalized date is the date range
$daterange = $u_date->att('normal');
}
#
#
#
# date_norm2 SUBPROCEDURE
# called by date_norm to normalize dates
#
#
#
sub date_norm2 # run this whenever I encounter a ///
{
my ($human_date, $daterange) = @_;
# Preserve the original date to print to the user in case we can't figure it out
$orig_date = $human_date;
# strip off ALL brackets
$human_date =~ s/[\[\]]//g;
# Replace [ongoing] with 9999
$human_date =~ s/\W+(ongoing)\s*$/9999/g;
# strip off the possible initial Date: as int "Date: ca. 1935"
if ($human_date =~ /^\s*[Dd][Aa][Tt][Ee]:(.*)$/)
{
$human_date = $1;
}
# remove a trailing period
if ($human_date =~ /(.*)\.\s*$/)
{
$human_date = $1;
}
# remove a trailing comma
if ($human_date =~ /(.*)\,\s*$/)
{
$human_date = $1;
}
# remove a leading period
if ($human_date =~ /(.*)\.\s*$/)
{
$human_date = $1;
}
# remove a leading comma
if ($human_date =~ /^\s*\,/)
{
$human_date = $1;
}
# Trash question marks
# Get the first one
$human_date =~ s/(\d{4})\?/\1/g;
# Get the second one, if it's there
$human_date =~ s/(\d{4})\?/\1/g;
# test
# turn all "January 1950s" into "January 1950"
# But leave "1950s" for later in the script
# This same line is run twice to take care of ranges like
# January 1950s-January 1960s
#$human_date =~ s/(.*\S+.*\d{3,4})'{0,1}s/\1/g;
#$human_date =~ s/(.*\S+.*\d{3,4})'{0,1}s/\1/g;
# January 1950s - January 1962 (get rid of 's')
#$human_date =~ s/^(.*\d{3,4})'{0,1}s(\s*[-\/].*\d{3,4}.*)$/\1\2/g;
# January 1952 - January 1960s
#$human_date =~ s/^(.*\d{3,4}.*[-\/].*\d{3,4})'{0,1}s(.*)$/\1\2/g;
# end test
# the super-long switch statement.
# now that I look at this, I think I probably could have made this a little
# shorter
$tryrange = 0;
find_date_format: {
# Most of the date formats are now taken care of in long_date()
# ranges starting with these two date formats don't play well with others,
# so they've been separated
# 1950-08-17
$human_date =~ /^\s*(\d{4})\s*[^\d]\s*(\d{1,2})\s*[^\d]\s*(\d{1,2})\s*$/ && do{
$norm_date = $1 . '-' . $2 . '-' . $3;
last find_date_format;
};
# 1950-08
$human_date =~ /^\s*(\d{4})\s*[^\d]\s*(\d{1,2})\s*$/ && do{
$norm_date = $1 . '-' . $2;
last find_date_format;
};
# 1950-08-17 - ??
$human_date =~ /^\s*(\d{4})\s*[^\d]\s*(\d{1,2})\s*[^\d]\s*(\d{1,2})\s*[-\/](.*)$/ && do{
$norm_date = $1 . '-' . $2 . '-' . $3 . '/' . long_date($4);
last find_date_format;
};
# anything listed immediately below cannot be used as part of a date range
# 1950-08 - ??
$human_date =~ /^\s*(\d{4})\s*[^\d]\s*(\d{1,2})\s*[-\/](.*)$/ && do{
$norm_date = $1 . '-' . $2 . '/' . long_date($3);
last find_date_format;
};
# 19th Century
$human_date =~ /(\d{1,2})([Tt][Hh]|[Rr][Dd]|[Nn][Dd])\s*[Cc]ent/ && do{
$norm_date = (($1 *100)- 99) . '/' . ($1 * 100);
last find_date_format;
};
# 1920
$human_date =~ /^\s*(\d{4})\s*$/ && do{
$norm_date = $1;
last find_date_format;
};
# 1920?
# $human_date =~ /^\s*(\d{4})\?\s*$/ && do{
# $norm_date = $1;
# last find_date_format;
# };
# 1920? or 1920?-1950?
# $human_date =~ /^\s*(\d{4})\?\s*[-\/]\s*(\d{4})\?{0,1}\s*$/ && do{
# $norm_date = $human_date;
# # Get the first one
# $human_date =~ s/(\d{4})\?/\1/g;
# # Get the second one, if it's there
# $human_date =~ s/(\d{4})\?/\1/g;
#
# # change the dash to a slash, if it's there
#
# $norm_date =~ s/\d{4}\s*[-]\s*\d{4}//g;
#
# last find_date_format;
# };
# 1920-1952
# 1920's- 1952
# $human_date =~ /^\s*(\d{4})\s*[^\d]\s*(\d{4})\s*$/ && do{
$human_date =~ /^\s*(\d{3,4})'{0,1}s{0,1}\s*[-\/]\s*(\d{3,4})\s*$/ && do{
$norm_date = $1 . '/' . $2;
last find_date_format;
};
# 1920s-1950's
# 1920 - 1950's
# translate into 1920/1959
$human_date =~ /^\s*(\d{3,4})'{0,1}s{0,1}\s*[-\/]\s*(\d{3,4})'{0,1}s\s*$/ && do{
$norm_date = $1 . '/' . ($2 + 9);
last find_date_format;
};
# ca. 1945
$human_date =~ /^\s*[Cc][^\d]*(\d{4})/ && do{
$norm_date = ($1 - 5) . '/' . ($1 + 5);
last find_date_format;
};
# 1950s, 1950's
$human_date =~ /^\s*(\d{4})'{0,1}s\s*$/ && do{
$norm_date = $1 . '/' . ($1 + 9);
last find_date_format;
};
# January 1950s, January 1950's = 1950/1959
# Should I do this at all? Not being done at the moment. Maybe I should leave it to the user...
# $human_date =~ /^\s*([a-zA-Z]+\s+)(\d{3,4})'{0,1}s\s*$/ && do{
# $norm_date = $1 . $2 . '/' . ($2 + 9);
# last find_date_format;
# };
# undated
$human_date =~ /^\s*[Uu][Nn][Dd][Aa][Tt][Ee][Dd]\s*$/ && do{
$norm_date = $daterange;
last find_date_format;
};
# n.d.
$human_date =~ /^\s*[Nn]\.[Dd]\.\s*$/ && do{
$norm_date = $daterange;
last find_date_format;
};
# empty field
$human_date =~ /^\s*$/ && do{
$norm_date = $daterange;
last find_date_format;
};
$norm_date = "CRAP"; # put this in here in case we can't understand the date
# Check not only for January 17, 1950, etc here, but also ranges, like
# January 1950 - January 17, 1951
# or 12-01-1972 - 12-12-1986
# but NOT 12-01-72 - 12-12-1986 (no two-digit year on the left side)
#if ($human_date =~ /\d{4}\s*[-\/]/)
if ($human_date =~ /\d{4}\s*[-\/]/)
{
# $human_date =~ /(.*\d{4})\s*[-\/](.*)/;
# this may only get the right half of the range
$human_date =~ /(\d{4})\s*[-\/](.*)/;
$ld2 = $2;
# this will get the rest of the left half of the range
# into @lefthalf[0]
# This splits the date string into everything left of
# and right of what's in $1
@lefthalf = split (/\d{4}\s*[-\/]/, $human_date);
# assemble the left half
$ld1 = @lefthalf[0] . $1;
#
# CREATE THE DATE RANGE
# $ld1 is the left half, $ld2 is the right half
# normalize each half, and put them together with a right slash
#
$norm_date = long_date($ld1) . '/' . long_date($ld2);
}
## !!! Test section starts here!!!
# January - March 1979
elsif ($human_date =~ /^([^a-zA-Z]*[a-zA-z]{3,9}.*)([-\/].*)([a-zA-Z]{3,9}.*)(\d{4})(.*)$/)
{
# rebuild date string into January 1979 - March 1979
$human_date3 = $1 . ' ' . ' ' . $4 . $2 . $3 . $4 . $5;
# print $human_date3;
# $norm_date = long_date($human_date3);
# $human_date =~ /(.*\d{4})\s*[-\/](.*)/;
# this may only get the right half of the range
$human_date3 =~ /(\d{4})\s*[-\/](.*)/;
$ld2 = $2;
# this will get the rest of the left half of the range
# into @lefthalf[0]
# This splits the date string into everything left of
# and right of what's in $1
@lefthalf = split (/\d{4}\s*[-\/]/, $human_date3);
# assemble the left half
$ld1 = @lefthalf[0] . $1;
#
# CREATE THE DATE RANGE
# $ld1 is the left half, $ld2 is the right half
# normalize each half, and put them together with a right slash
#
$norm_date = long_date($ld1) . '/' . long_date($ld2);
}
# January 17 - 28 1979
elsif ($human_date =~ /^([^a-zA-Z]*)([a-zA-z]{3,9})(.*)([-\/]\s*)(\d{1,2}.*)(\d{4})(.*)$/)
{
# rebuild date string into January 17 1979 - January 27 1979
$human_date3 = $1 . $2 . $3 . ' ' . $6 . ' ' . $4 . ' ' . $2 . ' ' . $5 . $6 . $7;
# $human_date3 =~ s/\s{2,9999}/ /g;
# print "\nhi: $human_date3";
# $norm_date = long_date($human_date3);
# $human_date =~ /(.*\d{4})\s*[-\/](.*)/;
# this may only get the right half of the range
$human_date3 =~ /(\d{4})\s*[-\/](.*)/;
$ld2 = $2;
# this will get the rest of the left half of the range
# into @lefthalf[0]
# This splits the date string into everything left of
# and right of what's in $1
@lefthalf = split (/\d{4}\s*[-\/]/, $human_date3);
# assemble the left half
$ld1 = @lefthalf[0] . $1;
#
# CREATE THE DATE RANGE
# $ld1 is the left half, $ld2 is the right half
# normalize each half, and put them together with a right slash
#
$norm_date = long_date($ld1) . '/' . long_date($ld2);
}
## !!! Test section ends here
else
{
# it's just a single date unit (not a date range)
$norm_date = long_date($human_date);
}
# if we couldn't identify the date
if ($norm_date =~ /CRAP/)
{
select STDOUT;
print "\n-------------------------\n";
print "unrecognized date format:\n\n";
print "\"" . $orig_date . "\"\n\n";
if (length $daterange > 0){
print "The date range for this collection is: \"" . $daterange . "\"\n";
} else
{ print "The date range for this collection has not been determined\n";
}
print "\nPlease input the ISO 8601 normalized form of this date,\n";
print " or just hit ENTER to accept the collection's date range\n";
print " as the normalized date:\n> ";
chomp ($norm_date = );
if(length $norm_date < 1)
{
$norm_date = $daterange;
}
print "-------------------------\n\n";
}
}
select STDOUT;
if ($verbose_mode eq 'y')
{
print "\nORIGINAL DATE: $orig_date\n";
print "NORMALIZED DATE: $norm_date\n";
$verbose_counter++;
if ($verbose_counter > 4)
{
print "\n\tPress \"Enter\" to continue> ";
$verb_cont = ;
$verbose_counter = 0;
}
}
}
#
#
#
# date_norm SUBPROCEDURE
# calls date_norm2 to normalize dates
# I can't remember why I did it this way.
#
sub date_norm{
my ($t, $un_date) = @_;
# Added support for '##' normal atts for Michele Combs
if ((!$un_date->att('normal')) || ($un_date->att('normal') eq '##') || ($replace_norms==1)){
$temp_human_date = $un_date->text();
date_norm2($temp_human_date, $daterange);
$norm_date = HTML::Entities::encode($norm_date);
$un_date->set_att('normal', $norm_date);
select STDOUT;
$date_count++;
}
}
#
#
#
# long_date SUBPROCEDURE
# Called by date_norm2 to handle most date formats, particulary
# the ones used in ranges.
#
sub long_date
{
my ($ld_human_date) = @_;
# Convert "1950 August 17" to "17 August 1950"
$ld_human_date =~ /^\s*(\d{4})\s*([A-Za-z]+)[^\d]+(\d{1,2})\s*$/ && do{
$ld_human_date = "$3 $2 $1";
};
find_ld_format: {
# 1950-08-17
$ld_human_date =~ /^\s*(\d{4})\s*[^\d]\s*(\d{1,2})\s*[^\d]\s*(\d{1,2})\s*$/ && do{
$ld_norm_date = $1 . '-' . $2 . '-' . $3;
last find_ld_format;
};
# 1950-08
$ld_human_date =~ /^\s*(\d{4})\s*[^\d]\s*(\d{1,2})\s*$/ && do{
$ld_norm_date = $1 . '-' . $2;
last find_ld_format;
};
# 8-17-1950 or 8/17/1950
$ld_human_date =~ /^\s*(\d{1,2})[^\d]+(\d{1,2})[^\d]+(\d{4})(.*)\s*$/ && do{
$ld_norm_date = $3 . '-' . $1 . '-' . $2;
last find_ld_format;
};
# Various forms of "January blah blah"
# use sprintf to pad single-digit days with a zero
# January 17, 1970
$ld_human_date =~ /^\s*[Jj]an[^\d]*(\d{1,2})[^\d]+(\d{4})\s*$/ && do{
$ld_norm_date = sprintf "%04d%04s%02d", ($2, '-01-', $1);
last find_ld_format;
};
# February 17, 1970
$ld_human_date =~ /^\s*[Ff]eb[^\d]*(\d{1,2})[^\d]+(\d{4})\s*$/ && do{
$ld_norm_date = sprintf "%04d%04s%02d", ($2, '-02-', $1);
last find_ld_format;
};
# March 17, 1970
$ld_human_date =~ /^\s*[Mm]ar[^\d]*(\d{1,2})[^\d]+(\d{4})\s*$/ && do{
$ld_norm_date = sprintf "%04d%04s%02d", ($2, '-03-', $1);
last find_ld_format;
};
# April 17, 1970
$ld_human_date =~ /^\s*[Aa]pr[^\d]*(\d{1,2})[^\d]+(\d{4})\s*$/ && do{
$ld_norm_date = sprintf "%04d%04s%02d", ($2, '-04-', $1);
last find_ld_format;
};
# May 17, 1970
$ld_human_date =~ /^\s*[Mm]ay[^\d]*(\d{1,2})[^\d]+(\d{4})\s*$/ && do{
$ld_norm_date = sprintf "%04d%04s%02d", ($2, '-05-', $1);
last find_ld_format;
};
# June 17, 1970
$ld_human_date =~ /^\s*[Jj]un[^\d]*(\d{1,2})[^\d]+(\d{4})\s*$/ && do{
$ld_norm_date = sprintf "%04d%04s%02d", ($2, '-06-', $1);
last find_ld_format;
};
# July 17, 1970
$ld_human_date =~ /^\s*[Jj]ul[^\d]*(\d{1,2})[^\d]+(\d{4})\s*$/ && do{
$ld_norm_date = sprintf "%04d%04s%02d", ($2, '-07-', $1);
last find_ld_format;
};
# August 17, 1970
$ld_human_date =~ /^\s*[Aa]ug[^\d]*(\d{1,2})[^\d]+(\d{4})\s*$/ && do{
$ld_norm_date = sprintf "%04d%04s%02d", ($2, '-08-', $1);
last find_ld_format;
};
# September 17, 1970
$ld_human_date =~ /^\s*[Ss]ep[^\d]*(\d{1,2})[^\d]+(\d{4})\s*$/ && do{
$ld_norm_date = sprintf "%04d%04s%02d", ($2, '-09-', $1);
last find_ld_format;
};
# October 17, 1970
$ld_human_date =~ /^\s*[Oo]ct[^\d]*(\d{1,2})[^\d]+(\d{4})\s*$/ && do{
$ld_norm_date = sprintf "%04d%04s%02d", ($2, '-10-', $1);
last find_ld_format;
};
# November 17, 1970
$ld_human_date =~ /^\s*[Nn]ov[^\d]*(\d{1,2})[^\d]+(\d{4})\s*$/ && do{
$ld_norm_date = sprintf "%04d%04s%02d", ($2, '-11-', $1);
last find_ld_format;
};
# December 17, 1970
$ld_human_date =~ /^\s*[Dd]ec[^\d]*(\d{1,2})[^\d]+(\d{4})\s*$/ && do{
$ld_norm_date = sprintf "%04d%04s%02d", ($2, '-12-', $1);
last find_ld_format;
};
# Add 17 January 1970 formats
# 17 January, 1970
$ld_human_date =~ /^\s*(\d{1,2})\s*[Jj]an[^\d]+(\d{4})\s*$/ && do{
$ld_norm_date = sprintf "%04d%04s%02d", ($2, '-01-', $1);
last find_ld_format;
};
# 17 February, 1970
$ld_human_date =~ /^\s*(\d{1,2})\s*[Ff]eb[^\d]+(\d{4})\s*$/ && do{
$ld_norm_date = sprintf "%04d%04s%02d", ($2, '-02-', $1);
last find_ld_format;
};
# 17 March, 1970
$ld_human_date =~ /^\s*(\d{1,2})\s*[Mm]ar[^\d]+(\d{4})\s*$/ && do{
$ld_norm_date = sprintf "%04d%04s%02d", ($2, '-03-', $1);
last find_ld_format;
};
# 17 April, 1970
$ld_human_date =~ /^\s*(\d{1,2})\s*[Aa]pr[^\d]+(\d{4})\s*$/ && do{
$ld_norm_date = sprintf "%04d%04s%02d", ($2, '-04-', $1);
last find_ld_format;
};
# 17 May, 1970
$ld_human_date =~ /^\s*(\d{1,2})\s*[Mm]ay[^\d]+(\d{4})\s*$/ && do{
$ld_norm_date = sprintf "%04d%04s%02d", ($2, '-05-', $1);
last find_ld_format;
};
# 17 June, 1970
$ld_human_date =~ /^\s*(\d{1,2})\s*[Jj]un[^\d]+(\d{4})\s*$/ && do{
$ld_norm_date = sprintf "%04d%04s%02d", ($2, '-06-', $1);
last find_ld_format;
};
# 17 July, 1970
$ld_human_date =~ /^\s*(\d{1,2})\s*[Jj]ul[^\d]+(\d{4})\s*$/ && do{
$ld_norm_date = sprintf "%04d%04s%02d", ($2, '-07-', $1);
last find_ld_format;
};
# 17 August, 1970
$ld_human_date =~ /^\s*(\d{1,2})\s*[Aa]ug[^\d]+(\d{4})\s*$/ && do{
$ld_norm_date = sprintf "%04d%04s%02d", ($2, '-08-', $1);
last find_ld_format;
};
# 17 September, 1970
$ld_human_date =~ /^\s*(\d{1,2})\s*[Ss]ep[^\d]+(\d{4})\s*$/ && do{
$ld_norm_date = sprintf "%04d%04s%02d", ($2, '-09-', $1);
last find_ld_format;
};
# 17 October, 1970
$ld_human_date =~ /^\s*(\d{1,2})\s*[Oo]ct[^\d]+(\d{4})\s*$/ && do{
$ld_norm_date = sprintf "%04d%04s%02d", ($2, '-10-', $1);
last find_ld_format;
};
# 17 November, 1970
$ld_human_date =~ /^\s*(\d{1,2})\s*[Nn]ov[^\d]+(\d{4})\s*$/ && do{
$ld_norm_date = sprintf "%04d%04s%02d", ($2, '-11-', $1);
last find_ld_format;
};
# 17 December, 1970
$ld_human_date =~ /^\s*(\d{1,2})\s*[Dd]ec[^\d]+(\d{4})\s*$/ && do{
$ld_norm_date = sprintf "%04d%04s%02d", ($2, '-12-', $1);
last find_ld_format;
};
# January 1970
$ld_human_date =~ /^\s*[Jj]an[^\d]*(\d{4})\s*$/ && do{
$ld_norm_date = $1 . '-01';
last find_ld_format;
};
# February 1970
$ld_human_date =~ /^\s*[Ff]eb[^\d]*(\d{4})\s*$/ && do{
$ld_norm_date = $1 . '-02';
last find_ld_format;
};
# March 1970
$ld_human_date =~ /^\s*[Mm]ar[^\d]*(\d{4})\s*$/ && do{
$ld_norm_date = $1 . '-03';
last find_ld_format;
};
# April 1970
$ld_human_date =~ /^\s*[Aa]pr[^\d]*(\d{4})\s*$/ && do{
$ld_norm_date = $1 . '-04';
last find_ld_format;
};
# May 1970
$ld_human_date =~ /^\s*[Mm]ay[^\d]*(\d{4})\s*$/ && do{
$ld_norm_date = $1 . '-05';
last find_ld_format;
};
# June 1970
$ld_human_date =~ /^\s*[Jj]un[^\d]*(\d{4})\s*$/ && do{
$ld_norm_date = $1 . '-06';
last find_ld_format;
};
# July 1970
$ld_human_date =~ /^\s*[Jj]ul[^\d]*(\d{4})\s*$/ && do{
$ld_norm_date = $1 . '-07';
last find_ld_format;
};
# August 1970
$ld_human_date =~ /^\s*[Aa]ug[^\d]*(\d{4})\s*$/ && do{
$ld_norm_date = $1 . '-08';
last find_ld_format;
};
# September 1970
$ld_human_date =~ /^\s*[Ss]ep[^\d]*(\d{4})\s*$/ && do{
$ld_norm_date = $1 . '-09';
last find_ld_format;
};
# October 1970
$ld_human_date =~ /^\s*[Oo]ct[^\d]*(\d{4})\s*$/ && do{
$ld_norm_date = $1 . '-10';
last find_ld_format;
};
# November 1970
$ld_human_date =~ /^\s*[Nn]ov[^\d]*(\d{4})\s*$/ && do{
$ld_norm_date = $1 . '-11';
last find_ld_format;
};
# December 1970
$ld_human_date =~ /^\s*[Dd]ec[^\d]*(\d{4})\s*$/ && do{
$ld_norm_date = $1 . '-12';
last find_ld_format;
};
$ld_norm_date = "CRAP";
}
return $ld_norm_date;
}