#!perl use Encode; use HTML::Entities (); # You will need to install this perl module: use XML::Twig; # bugfix and small feature update: 15 July 2008 # Jason Casden # casden@gmail.com # 10 June 2004 # Jerome Lawrence and Robert E. Lee Theatre Research Institute # The Ohio State University # # This script takes XML input, "normalizes" each field to # the IS 8601 format. This script handles both individual dates, and date # ranges. Anything listed in sub long_date can be used as part of a range, # and the range can be any combination of those date formats. # If the script cannot interpret the input date, it will prompt the user to # input a normalized form by hand. The output of the script is in formatted # XML. The output filename is the same as the input, appended with .out . # The original dates are kept intact, and the normalized forms are added # as a normal="" attribute to the appropriate . # # If you run into any problems, please feel free to email me at # casden@email.unc.edu # Use UTF-8 Unicode for default file I/O use open ':utf8'; select STDOUT; while(){ print "\n\nRun in verbose mode? (y/n): "; chomp ($verbose_mode = ); $verbose_mode = lc($verbose_mode); # Remove trailing whitespace $verbose_mode =~ s/\s*$//g; if ($verbose_mode =~ /^[yn]$/) { last; } else { print "y or n, please\n"; } } while(){ print "\n\nReplace all previous 'normal' attributes in fields? (y/n): "; chomp ($norm_in = ); $norm_in=lc($norm_in); # Remove trailing whitespace $norm_in =~ s/\s*$//g; if($norm_in =~ /^[yn]$/) { if ($norm_in == 'y'){ $replace_norms = 1; } else { $replace_norms = 0; } last; } else { print "y or n, please\n"; } } $verbose_counter = 0; print "\n\nPlease enter the filename of the XML file with dates to be normalized\n> "; chomp ($xmlfile = ); if (!(open XMLDATA, "$xmlfile")) # see if the file can be opened { die "The input file " . $xmlfile . " cannot be opened for reading.\n"; } $xml_out = $xmlfile . '.out'; # see if the file.out can be written to if (!(open XMLOUTPUT, ">:utf8", "$xml_out")) { die "The output file " . $xml_out . " can't be opened.\n"; } close XMLDATA; select XMLOUTPUT; $date_count = 0; select STDOUT; print "\n ***************************************\n"; print " * Finding the collection's date range *\n"; print " ***************************************\n"; my $twig_handlers = {'ead/archdesc/did/unitdate' => \&cont_break}; my $twig = new XML::Twig(TwigHandlers => $twig_handlers, NoExpand => 1, ParseParamEnt => 0); $twig->parsefile($xmlfile); select XMLOUTPUT; $twig->set_pretty_print('record'); $twig->print; # re-output the XML, with the normalized dates close XMLOUTPUT; # now parse the unitdates select STDOUT; print "\n ***************************************\n"; print ' * Normalizing the fields *' . "\n"; print " ***************************************\n\n"; my $twig_handlers = {'unitdate' => \&date_norm}; my $twig = new XML::Twig(TwigHandlers => $twig_handlers, NoExpand => 1, ParseParamEnt => 0); $twig->parsefile($xml_out); open XMLOUTPUT, ">:utf8", "$xml_out"; select XMLOUTPUT; $twig->set_pretty_print('record'); $twig->print; # re-output the XML, with the normalized dates select STDOUT; print "\n\nProcessed " . $date_count . ' fields.' . "\n\n"; # # THE END OF THE PROGRAM # # # THE START OF THE SUBPROCEDURES # # # # # cont_break SUBPROCEDURE # used to find date ranges # # # # Run this whenever I encounter a /// # the date range of the document should be in this field (after it's normalized) sub cont_break { select STDOUT; my ($t, $u_date) = @_; # you can probably just make this a single variable # $daterange = $u_date->text(); # Added '##' placeholder for Michele Combs if ((!$u_date->att('normal')) || ($u_date->att('normal') eq '##') || ($replace_norms==1)) # has it already been normalized? { # it wasn't already normalized, so normalize it $tempvar = $u_date->text(); date_norm2 ($tempvar, $daterange); $u_date->set_att('normal', $norm_date); $date_count++; } # the normalized date is the date range $daterange = $u_date->att('normal'); } # # # # date_norm2 SUBPROCEDURE # called by date_norm to normalize dates # # # sub date_norm2 # run this whenever I encounter a /// { my ($human_date, $daterange) = @_; # Preserve the original date to print to the user in case we can't figure it out $orig_date = $human_date; # strip off ALL brackets $human_date =~ s/[\[\]]//g; # Replace [ongoing] with 9999 $human_date =~ s/\W+(ongoing)\s*$/9999/g; # strip off the possible initial Date: as int "Date: ca. 1935" if ($human_date =~ /^\s*[Dd][Aa][Tt][Ee]:(.*)$/) { $human_date = $1; } # remove a trailing period if ($human_date =~ /(.*)\.\s*$/) { $human_date = $1; } # remove a trailing comma if ($human_date =~ /(.*)\,\s*$/) { $human_date = $1; } # remove a leading period if ($human_date =~ /(.*)\.\s*$/) { $human_date = $1; } # remove a leading comma if ($human_date =~ /^\s*\,/) { $human_date = $1; } # Trash question marks # Get the first one $human_date =~ s/(\d{4})\?/\1/g; # Get the second one, if it's there $human_date =~ s/(\d{4})\?/\1/g; # test # turn all "January 1950s" into "January 1950" # But leave "1950s" for later in the script # This same line is run twice to take care of ranges like # January 1950s-January 1960s #$human_date =~ s/(.*\S+.*\d{3,4})'{0,1}s/\1/g; #$human_date =~ s/(.*\S+.*\d{3,4})'{0,1}s/\1/g; # January 1950s - January 1962 (get rid of 's') #$human_date =~ s/^(.*\d{3,4})'{0,1}s(\s*[-\/].*\d{3,4}.*)$/\1\2/g; # January 1952 - January 1960s #$human_date =~ s/^(.*\d{3,4}.*[-\/].*\d{3,4})'{0,1}s(.*)$/\1\2/g; # end test # the super-long switch statement. # now that I look at this, I think I probably could have made this a little # shorter $tryrange = 0; find_date_format: { # Most of the date formats are now taken care of in long_date() # ranges starting with these two date formats don't play well with others, # so they've been separated # 1950-08-17 $human_date =~ /^\s*(\d{4})\s*[^\d]\s*(\d{1,2})\s*[^\d]\s*(\d{1,2})\s*$/ && do{ $norm_date = $1 . '-' . $2 . '-' . $3; last find_date_format; }; # 1950-08 $human_date =~ /^\s*(\d{4})\s*[^\d]\s*(\d{1,2})\s*$/ && do{ $norm_date = $1 . '-' . $2; last find_date_format; }; # 1950-08-17 - ?? $human_date =~ /^\s*(\d{4})\s*[^\d]\s*(\d{1,2})\s*[^\d]\s*(\d{1,2})\s*[-\/](.*)$/ && do{ $norm_date = $1 . '-' . $2 . '-' . $3 . '/' . long_date($4); last find_date_format; }; # anything listed immediately below cannot be used as part of a date range # 1950-08 - ?? $human_date =~ /^\s*(\d{4})\s*[^\d]\s*(\d{1,2})\s*[-\/](.*)$/ && do{ $norm_date = $1 . '-' . $2 . '/' . long_date($3); last find_date_format; }; # 19th Century $human_date =~ /(\d{1,2})([Tt][Hh]|[Rr][Dd]|[Nn][Dd])\s*[Cc]ent/ && do{ $norm_date = (($1 *100)- 99) . '/' . ($1 * 100); last find_date_format; }; # 1920 $human_date =~ /^\s*(\d{4})\s*$/ && do{ $norm_date = $1; last find_date_format; }; # 1920? # $human_date =~ /^\s*(\d{4})\?\s*$/ && do{ # $norm_date = $1; # last find_date_format; # }; # 1920? or 1920?-1950? # $human_date =~ /^\s*(\d{4})\?\s*[-\/]\s*(\d{4})\?{0,1}\s*$/ && do{ # $norm_date = $human_date; # # Get the first one # $human_date =~ s/(\d{4})\?/\1/g; # # Get the second one, if it's there # $human_date =~ s/(\d{4})\?/\1/g; # # # change the dash to a slash, if it's there # # $norm_date =~ s/\d{4}\s*[-]\s*\d{4}//g; # # last find_date_format; # }; # 1920-1952 # 1920's- 1952 # $human_date =~ /^\s*(\d{4})\s*[^\d]\s*(\d{4})\s*$/ && do{ $human_date =~ /^\s*(\d{3,4})'{0,1}s{0,1}\s*[-\/]\s*(\d{3,4})\s*$/ && do{ $norm_date = $1 . '/' . $2; last find_date_format; }; # 1920s-1950's # 1920 - 1950's # translate into 1920/1959 $human_date =~ /^\s*(\d{3,4})'{0,1}s{0,1}\s*[-\/]\s*(\d{3,4})'{0,1}s\s*$/ && do{ $norm_date = $1 . '/' . ($2 + 9); last find_date_format; }; # ca. 1945 $human_date =~ /^\s*[Cc][^\d]*(\d{4})/ && do{ $norm_date = ($1 - 5) . '/' . ($1 + 5); last find_date_format; }; # 1950s, 1950's $human_date =~ /^\s*(\d{4})'{0,1}s\s*$/ && do{ $norm_date = $1 . '/' . ($1 + 9); last find_date_format; }; # January 1950s, January 1950's = 1950/1959 # Should I do this at all? Not being done at the moment. Maybe I should leave it to the user... # $human_date =~ /^\s*([a-zA-Z]+\s+)(\d{3,4})'{0,1}s\s*$/ && do{ # $norm_date = $1 . $2 . '/' . ($2 + 9); # last find_date_format; # }; # undated $human_date =~ /^\s*[Uu][Nn][Dd][Aa][Tt][Ee][Dd]\s*$/ && do{ $norm_date = $daterange; last find_date_format; }; # n.d. $human_date =~ /^\s*[Nn]\.[Dd]\.\s*$/ && do{ $norm_date = $daterange; last find_date_format; }; # empty field $human_date =~ /^\s*$/ && do{ $norm_date = $daterange; last find_date_format; }; $norm_date = "CRAP"; # put this in here in case we can't understand the date # Check not only for January 17, 1950, etc here, but also ranges, like # January 1950 - January 17, 1951 # or 12-01-1972 - 12-12-1986 # but NOT 12-01-72 - 12-12-1986 (no two-digit year on the left side) #if ($human_date =~ /\d{4}\s*[-\/]/) if ($human_date =~ /\d{4}\s*[-\/]/) { # $human_date =~ /(.*\d{4})\s*[-\/](.*)/; # this may only get the right half of the range $human_date =~ /(\d{4})\s*[-\/](.*)/; $ld2 = $2; # this will get the rest of the left half of the range # into @lefthalf[0] # This splits the date string into everything left of # and right of what's in $1 @lefthalf = split (/\d{4}\s*[-\/]/, $human_date); # assemble the left half $ld1 = @lefthalf[0] . $1; # # CREATE THE DATE RANGE # $ld1 is the left half, $ld2 is the right half # normalize each half, and put them together with a right slash # $norm_date = long_date($ld1) . '/' . long_date($ld2); } ## !!! Test section starts here!!! # January - March 1979 elsif ($human_date =~ /^([^a-zA-Z]*[a-zA-z]{3,9}.*)([-\/].*)([a-zA-Z]{3,9}.*)(\d{4})(.*)$/) { # rebuild date string into January 1979 - March 1979 $human_date3 = $1 . ' ' . ' ' . $4 . $2 . $3 . $4 . $5; # print $human_date3; # $norm_date = long_date($human_date3); # $human_date =~ /(.*\d{4})\s*[-\/](.*)/; # this may only get the right half of the range $human_date3 =~ /(\d{4})\s*[-\/](.*)/; $ld2 = $2; # this will get the rest of the left half of the range # into @lefthalf[0] # This splits the date string into everything left of # and right of what's in $1 @lefthalf = split (/\d{4}\s*[-\/]/, $human_date3); # assemble the left half $ld1 = @lefthalf[0] . $1; # # CREATE THE DATE RANGE # $ld1 is the left half, $ld2 is the right half # normalize each half, and put them together with a right slash # $norm_date = long_date($ld1) . '/' . long_date($ld2); } # January 17 - 28 1979 elsif ($human_date =~ /^([^a-zA-Z]*)([a-zA-z]{3,9})(.*)([-\/]\s*)(\d{1,2}.*)(\d{4})(.*)$/) { # rebuild date string into January 17 1979 - January 27 1979 $human_date3 = $1 . $2 . $3 . ' ' . $6 . ' ' . $4 . ' ' . $2 . ' ' . $5 . $6 . $7; # $human_date3 =~ s/\s{2,9999}/ /g; # print "\nhi: $human_date3"; # $norm_date = long_date($human_date3); # $human_date =~ /(.*\d{4})\s*[-\/](.*)/; # this may only get the right half of the range $human_date3 =~ /(\d{4})\s*[-\/](.*)/; $ld2 = $2; # this will get the rest of the left half of the range # into @lefthalf[0] # This splits the date string into everything left of # and right of what's in $1 @lefthalf = split (/\d{4}\s*[-\/]/, $human_date3); # assemble the left half $ld1 = @lefthalf[0] . $1; # # CREATE THE DATE RANGE # $ld1 is the left half, $ld2 is the right half # normalize each half, and put them together with a right slash # $norm_date = long_date($ld1) . '/' . long_date($ld2); } ## !!! Test section ends here else { # it's just a single date unit (not a date range) $norm_date = long_date($human_date); } # if we couldn't identify the date if ($norm_date =~ /CRAP/) { select STDOUT; print "\n-------------------------\n"; print "unrecognized date format:\n\n"; print "\"" . $orig_date . "\"\n\n"; if (length $daterange > 0){ print "The date range for this collection is: \"" . $daterange . "\"\n"; } else { print "The date range for this collection has not been determined\n"; } print "\nPlease input the ISO 8601 normalized form of this date,\n"; print " or just hit ENTER to accept the collection's date range\n"; print " as the normalized date:\n> "; chomp ($norm_date = ); if(length $norm_date < 1) { $norm_date = $daterange; } print "-------------------------\n\n"; } } select STDOUT; if ($verbose_mode eq 'y') { print "\nORIGINAL DATE: $orig_date\n"; print "NORMALIZED DATE: $norm_date\n"; $verbose_counter++; if ($verbose_counter > 4) { print "\n\tPress \"Enter\" to continue> "; $verb_cont = ; $verbose_counter = 0; } } } # # # # date_norm SUBPROCEDURE # calls date_norm2 to normalize dates # I can't remember why I did it this way. # sub date_norm{ my ($t, $un_date) = @_; # Added support for '##' normal atts for Michele Combs if ((!$un_date->att('normal')) || ($un_date->att('normal') eq '##') || ($replace_norms==1)){ $temp_human_date = $un_date->text(); date_norm2($temp_human_date, $daterange); $norm_date = HTML::Entities::encode($norm_date); $un_date->set_att('normal', $norm_date); select STDOUT; $date_count++; } } # # # # long_date SUBPROCEDURE # Called by date_norm2 to handle most date formats, particulary # the ones used in ranges. # sub long_date { my ($ld_human_date) = @_; # Convert "1950 August 17" to "17 August 1950" $ld_human_date =~ /^\s*(\d{4})\s*([A-Za-z]+)[^\d]+(\d{1,2})\s*$/ && do{ $ld_human_date = "$3 $2 $1"; }; find_ld_format: { # 1950-08-17 $ld_human_date =~ /^\s*(\d{4})\s*[^\d]\s*(\d{1,2})\s*[^\d]\s*(\d{1,2})\s*$/ && do{ $ld_norm_date = $1 . '-' . $2 . '-' . $3; last find_ld_format; }; # 1950-08 $ld_human_date =~ /^\s*(\d{4})\s*[^\d]\s*(\d{1,2})\s*$/ && do{ $ld_norm_date = $1 . '-' . $2; last find_ld_format; }; # 8-17-1950 or 8/17/1950 $ld_human_date =~ /^\s*(\d{1,2})[^\d]+(\d{1,2})[^\d]+(\d{4})(.*)\s*$/ && do{ $ld_norm_date = $3 . '-' . $1 . '-' . $2; last find_ld_format; }; # Various forms of "January blah blah" # use sprintf to pad single-digit days with a zero # January 17, 1970 $ld_human_date =~ /^\s*[Jj]an[^\d]*(\d{1,2})[^\d]+(\d{4})\s*$/ && do{ $ld_norm_date = sprintf "%04d%04s%02d", ($2, '-01-', $1); last find_ld_format; }; # February 17, 1970 $ld_human_date =~ /^\s*[Ff]eb[^\d]*(\d{1,2})[^\d]+(\d{4})\s*$/ && do{ $ld_norm_date = sprintf "%04d%04s%02d", ($2, '-02-', $1); last find_ld_format; }; # March 17, 1970 $ld_human_date =~ /^\s*[Mm]ar[^\d]*(\d{1,2})[^\d]+(\d{4})\s*$/ && do{ $ld_norm_date = sprintf "%04d%04s%02d", ($2, '-03-', $1); last find_ld_format; }; # April 17, 1970 $ld_human_date =~ /^\s*[Aa]pr[^\d]*(\d{1,2})[^\d]+(\d{4})\s*$/ && do{ $ld_norm_date = sprintf "%04d%04s%02d", ($2, '-04-', $1); last find_ld_format; }; # May 17, 1970 $ld_human_date =~ /^\s*[Mm]ay[^\d]*(\d{1,2})[^\d]+(\d{4})\s*$/ && do{ $ld_norm_date = sprintf "%04d%04s%02d", ($2, '-05-', $1); last find_ld_format; }; # June 17, 1970 $ld_human_date =~ /^\s*[Jj]un[^\d]*(\d{1,2})[^\d]+(\d{4})\s*$/ && do{ $ld_norm_date = sprintf "%04d%04s%02d", ($2, '-06-', $1); last find_ld_format; }; # July 17, 1970 $ld_human_date =~ /^\s*[Jj]ul[^\d]*(\d{1,2})[^\d]+(\d{4})\s*$/ && do{ $ld_norm_date = sprintf "%04d%04s%02d", ($2, '-07-', $1); last find_ld_format; }; # August 17, 1970 $ld_human_date =~ /^\s*[Aa]ug[^\d]*(\d{1,2})[^\d]+(\d{4})\s*$/ && do{ $ld_norm_date = sprintf "%04d%04s%02d", ($2, '-08-', $1); last find_ld_format; }; # September 17, 1970 $ld_human_date =~ /^\s*[Ss]ep[^\d]*(\d{1,2})[^\d]+(\d{4})\s*$/ && do{ $ld_norm_date = sprintf "%04d%04s%02d", ($2, '-09-', $1); last find_ld_format; }; # October 17, 1970 $ld_human_date =~ /^\s*[Oo]ct[^\d]*(\d{1,2})[^\d]+(\d{4})\s*$/ && do{ $ld_norm_date = sprintf "%04d%04s%02d", ($2, '-10-', $1); last find_ld_format; }; # November 17, 1970 $ld_human_date =~ /^\s*[Nn]ov[^\d]*(\d{1,2})[^\d]+(\d{4})\s*$/ && do{ $ld_norm_date = sprintf "%04d%04s%02d", ($2, '-11-', $1); last find_ld_format; }; # December 17, 1970 $ld_human_date =~ /^\s*[Dd]ec[^\d]*(\d{1,2})[^\d]+(\d{4})\s*$/ && do{ $ld_norm_date = sprintf "%04d%04s%02d", ($2, '-12-', $1); last find_ld_format; }; # Add 17 January 1970 formats # 17 January, 1970 $ld_human_date =~ /^\s*(\d{1,2})\s*[Jj]an[^\d]+(\d{4})\s*$/ && do{ $ld_norm_date = sprintf "%04d%04s%02d", ($2, '-01-', $1); last find_ld_format; }; # 17 February, 1970 $ld_human_date =~ /^\s*(\d{1,2})\s*[Ff]eb[^\d]+(\d{4})\s*$/ && do{ $ld_norm_date = sprintf "%04d%04s%02d", ($2, '-02-', $1); last find_ld_format; }; # 17 March, 1970 $ld_human_date =~ /^\s*(\d{1,2})\s*[Mm]ar[^\d]+(\d{4})\s*$/ && do{ $ld_norm_date = sprintf "%04d%04s%02d", ($2, '-03-', $1); last find_ld_format; }; # 17 April, 1970 $ld_human_date =~ /^\s*(\d{1,2})\s*[Aa]pr[^\d]+(\d{4})\s*$/ && do{ $ld_norm_date = sprintf "%04d%04s%02d", ($2, '-04-', $1); last find_ld_format; }; # 17 May, 1970 $ld_human_date =~ /^\s*(\d{1,2})\s*[Mm]ay[^\d]+(\d{4})\s*$/ && do{ $ld_norm_date = sprintf "%04d%04s%02d", ($2, '-05-', $1); last find_ld_format; }; # 17 June, 1970 $ld_human_date =~ /^\s*(\d{1,2})\s*[Jj]un[^\d]+(\d{4})\s*$/ && do{ $ld_norm_date = sprintf "%04d%04s%02d", ($2, '-06-', $1); last find_ld_format; }; # 17 July, 1970 $ld_human_date =~ /^\s*(\d{1,2})\s*[Jj]ul[^\d]+(\d{4})\s*$/ && do{ $ld_norm_date = sprintf "%04d%04s%02d", ($2, '-07-', $1); last find_ld_format; }; # 17 August, 1970 $ld_human_date =~ /^\s*(\d{1,2})\s*[Aa]ug[^\d]+(\d{4})\s*$/ && do{ $ld_norm_date = sprintf "%04d%04s%02d", ($2, '-08-', $1); last find_ld_format; }; # 17 September, 1970 $ld_human_date =~ /^\s*(\d{1,2})\s*[Ss]ep[^\d]+(\d{4})\s*$/ && do{ $ld_norm_date = sprintf "%04d%04s%02d", ($2, '-09-', $1); last find_ld_format; }; # 17 October, 1970 $ld_human_date =~ /^\s*(\d{1,2})\s*[Oo]ct[^\d]+(\d{4})\s*$/ && do{ $ld_norm_date = sprintf "%04d%04s%02d", ($2, '-10-', $1); last find_ld_format; }; # 17 November, 1970 $ld_human_date =~ /^\s*(\d{1,2})\s*[Nn]ov[^\d]+(\d{4})\s*$/ && do{ $ld_norm_date = sprintf "%04d%04s%02d", ($2, '-11-', $1); last find_ld_format; }; # 17 December, 1970 $ld_human_date =~ /^\s*(\d{1,2})\s*[Dd]ec[^\d]+(\d{4})\s*$/ && do{ $ld_norm_date = sprintf "%04d%04s%02d", ($2, '-12-', $1); last find_ld_format; }; # January 1970 $ld_human_date =~ /^\s*[Jj]an[^\d]*(\d{4})\s*$/ && do{ $ld_norm_date = $1 . '-01'; last find_ld_format; }; # February 1970 $ld_human_date =~ /^\s*[Ff]eb[^\d]*(\d{4})\s*$/ && do{ $ld_norm_date = $1 . '-02'; last find_ld_format; }; # March 1970 $ld_human_date =~ /^\s*[Mm]ar[^\d]*(\d{4})\s*$/ && do{ $ld_norm_date = $1 . '-03'; last find_ld_format; }; # April 1970 $ld_human_date =~ /^\s*[Aa]pr[^\d]*(\d{4})\s*$/ && do{ $ld_norm_date = $1 . '-04'; last find_ld_format; }; # May 1970 $ld_human_date =~ /^\s*[Mm]ay[^\d]*(\d{4})\s*$/ && do{ $ld_norm_date = $1 . '-05'; last find_ld_format; }; # June 1970 $ld_human_date =~ /^\s*[Jj]un[^\d]*(\d{4})\s*$/ && do{ $ld_norm_date = $1 . '-06'; last find_ld_format; }; # July 1970 $ld_human_date =~ /^\s*[Jj]ul[^\d]*(\d{4})\s*$/ && do{ $ld_norm_date = $1 . '-07'; last find_ld_format; }; # August 1970 $ld_human_date =~ /^\s*[Aa]ug[^\d]*(\d{4})\s*$/ && do{ $ld_norm_date = $1 . '-08'; last find_ld_format; }; # September 1970 $ld_human_date =~ /^\s*[Ss]ep[^\d]*(\d{4})\s*$/ && do{ $ld_norm_date = $1 . '-09'; last find_ld_format; }; # October 1970 $ld_human_date =~ /^\s*[Oo]ct[^\d]*(\d{4})\s*$/ && do{ $ld_norm_date = $1 . '-10'; last find_ld_format; }; # November 1970 $ld_human_date =~ /^\s*[Nn]ov[^\d]*(\d{4})\s*$/ && do{ $ld_norm_date = $1 . '-11'; last find_ld_format; }; # December 1970 $ld_human_date =~ /^\s*[Dd]ec[^\d]*(\d{4})\s*$/ && do{ $ld_norm_date = $1 . '-12'; last find_ld_format; }; $ld_norm_date = "CRAP"; } return $ld_norm_date; }