Perl

This is a script used to upload and parse a data file

#!/usr/bin/perl -w

# This Script was developed for www.miniplantkingdom.com.
# Their site is run by Perl CGI scripts reading a flat file database
# to fill out the various web pages on the fly.

# This file uploads a database created from a FileMaker File and output as comma delimited
# text file to be processed, then it checks the file for properly filled out records, usually
# dropping the record if it is not correct.
# After error checking, the corrected text file is saved for CGI use by the site,
# and serious errors are reported back to the user, so they can decide if they want to try
# uploading again after fixing the data in the FileMaker File.
# after uploading, a web page is made for each plant record that is uploaded
# and an index is made with a listing linking to all the plant web pages.
# the individual files are made for spiders and web crawlers to identify for search engines

use strict;
use CGI;
use CGI::Carp qw(fatalsToBrowser);
use lib '../lib';

use sitelib qw($dataplant $picdir $plantgroup @datadefplant @linesplant %groupdescriptions
 fmchars initSearchData repl setPlantRec setDescRec );
$dataplant = "../$dataplant"; # fix dataplant since we are in a different place

umask(022); # create files world readable, but not writeable.

my $query = new CGI;

print $query->header;
print $query->start_html(-title=>'Upload Databases',
 -text=>'#000000',
 -bgcolor=>'#FFFFFF',
 -LINK=>'#FF6666',
 -VLINK=>'#FF6666',
 -ALINK=>'#CCCCCC');

print <<ETX;
<blockquote>
<H2>Upload database export files for MPK...</H2>
<HR>
ETX

print $query->start_multipart_form(-method=>'POST',
 -encoding=>'multipart/form-data');

print "Plant database file (plants.txt) \n";
print $query->filefield(-name=>"plantDB"), " \n";

print $query->submit(-name=>"submit", value=>"Upload Files"), "\n";

print $query->endform;

print $query->end_html, "\n";

print " <HR> \n";

if($query->request_method() eq 'POST') {
 no strict 'refs';
 my $fh; # input data file handle
 my $lineerrs = 0; # improper number of fields
 my $missingerrs = 0; # required field missing data
 my $n=0; my $x=0; # counter variables
 $fh = $query->param("plantDB");
 if(length($fh)) {
 my($savsep) = $/;
 my(@filelines); # input file divided into lines
 my $lines = 0; # counter for lines of input file
my $outputlines = 0; # counter for lines we output
my $plantlines = 0; # counter for lines in lineplant array
my %cnmap; # hash of what field number is each property
 my $plantid; # plantid of this line of input
 my $oldplantid; # plantid of previous line of input
 my @splitid; # array of plantid on '_'
 my $categorycode = " "; # holds the category code to check for category data
 my $deadcategory = " ";

 @cnmap{@datadefplant} = 0 .. int(@datadefplant);

 open(NEWF, ">$dataplant.new") || die "can't open $dataplant.new: $!";
 undef $/;
 $_ = <$fh> # read in the entire file
 $/ = $savsep;
 if(tr/\n/\n/ < 2) { # no newlines so it must be mac format
 @filelines = split(/\r/);
 } else {
 @filelines = split(/\r?\n/);
 }
 PCLINE: foreach $_ (@filelines) {
 $_ = fmchars($_); # convert/rewrite filemaker output chars to html style
 next if(/^\s*$/);
 my(@splits) = split(/ /); # split on tab
 $n=0;

 $plantid = $splits[$cnmap{'plantid'}];
@splitid = split(/_/, $plantid);

 if ($splitid[0] ne $deadcategory) { # deadcategory records if a category has no descriptor. # if that is the case dont load any of them.
 # check for leading and trailing blanks in all fields and delete them
 foreach my $data (@splits){
 #leading blanks
 if ($data =~ /^ (.*)/){
 while ($data =~ /(.*) $/){
 $data = $1;
 }
 }
 # trailing blanks
 if ($data =~ /(.*) $/){
 while ($data =~ /(.*) $/){
 $data = $1;
 }
 }
 $n++
 }


 #check format of plantid
 if(int(@splitid) != 2){
 print " Line number $lines \"$splits[$cnmap{'aabotanicalname'}]\": has the plantid set up wrong. ";
 print " it should be in the format of XX_000.";
 $lineerrs++;
 $lines++;
 next PCLINE;
 }
 #check format of plantid
 if(!($plantid =~ /^[\@A-Z][\@A-Z]_[0-9][0-9][0-9]$/)){
 print " Line number $lines \"$splits[$cnmap{'aabotanicalname'}]\": has the plantid set up wrong. ";
 print " it should be in the format of XX_000.";
 $lineerrs++;
 $lines++;
 next PCLINE;
 }

 #check for duplicate plantid
 if(!($plantid =~ /^[\@A-Z][\@A-Z]_[0-9][0-9][0-9]$/)){
 print " Line number $lines \"$splits[$cnmap{'aabotanicalname'}]\": has the same plantid as the one before it. ";
 print " I am skipping it. ";
 $lineerrs++;
 $lines++;
 next PCLINE;
 }

 # check for the proper number of fields in the line. Skip if if the wrong number
 if(int(@splits) != int(@datadefplant)){
 $n = int(@splits);
 $x = int(@datadefplant);
 print " Line number $lines \"$splits[$cnmap{'aabotanicalname'}]\": has wrong number of fields ";
 print " it only had $n fields instead of $x, so I ignored this record ";
 $lineerrs++;
 $lines++;
 next PCLINE;
 }

 # check for inclusion of description lines for each new category
 if ($categorycode ne $splitid[0]){
 if (!($splitid[1] eq '000')){
 $deadcategory = $splitid[0];
 print " There is no description (".$splitid[0]."_000) for category $splits[$cnmap{'aacategory'}] (line $lines) ";
 print "So I am skipping this entire category!!! ";
 }
 $categorycode = $splitid[0];
 }

 # check for contents in required content fields
 if (!($splits[$cnmap{plantid}] =~ /000/)){
 foreach my $name ( qw(available aacategory aabotanicalname ever zone)) { # sizes
 if(!length($splits[$cnmap{$name}])) {
 print " Line number $lines \"$splits[$cnmap{'aabotanicalname'}]\": contains empty $name field -- ignored record \n";
 $missingerrs++;
 $lines++;
 next PCLINE;
 }
 }

 # check for images data and verify that the image is where it belongs. if a problem, put in default image.
 foreach my $image (qw(imagea imageb imagec imaged)){
 if(!length($splits[$cnmap{$image}])) {
 print "Plant photo $splits[$cnmap{$image}] for $splits[$cnmap{aabotanicalname}] (line $lines) not provided \n";
 $splits[$cnmap{$image}] = "default".$image.".jpg";
 } elsif(! -f "$picdir/$splits[$cnmap{$image}]") {
 $splits[$cnmap{$image}] = "default".$image.".jpg";
 }
 }

 # This is where expansions go
 if (uc($splits[$cnmap{ever}]) eq "Y"){
 $splits[$cnmap{ever}] = "Evergreen";
 } else {
 $splits[$cnmap{ever}] = "Deciduous";
 }

 $splits[$cnmap{sizes}] =~ s/(Sm|Sm\.)/Small Pot /ig;
 $splits[$cnmap{sizes}] =~ s/(Md|Md\.)/Medium Pot /ig;
 $splits[$cnmap{sizes}] =~ s/(Lg|Lg\.)/Large Pot /ig;
 $splits[$cnmap{sizes}] =~ s/(Jb|Jb\.|\.Jumbo|Jumbo\.)/Jumbo Pot /ig;
 $splits[$cnmap{sizes}] =~ s/(Other\.|\.Other)/Other Sizes /ig;
 $splits[$cnmap{water_LMH}] =~ s/[Ll]/Low /ig;
 $splits[$cnmap{water_LMH}] =~ s/[Mm]/Moderate /ig;
 $splits[$cnmap{water_LMH}] =~ s/[Hh]/Heavy /ig;
 $splits[$cnmap{gardentype}] =~ s/\./ /g;
 $splits[$cnmap{optimal_light}] =~ s/\./ /g;

 }

 $lines++;
 $outputlines++;
 if ($splitid[0] ne $deadcategory) { # deadcategory records if a category has no descriptor. # if that is the case dont load any of them.

 my $result = join(' ', @splits);
 print NEWF "$result\n";
 }
 } else { #deadcategory
 $lines++;
 }
 }

 close(NEWF);

 # complain about errors
 if(($lineerrs + $missingerrs) > 10) { # too many errors so don't save
 print " upload of $fh contains $lineerrs lines with incorrect number of fields ";
 print "and $missingerrs lines with missing category or botanical name. ";
 print "$dataplant not updated ";
 # comment out the exit if you want to save anyway
 exit(0);
 } elsif (($lineerrs + $missingerrs) > 0) { # only a few errors so allow saving
 print " upload of $fh contains $lineerrs lines with incorrect number of fields ";
 print "and $missingerrs lines with missing category or botanical name. ";
 print "ignored those lines and updated $dataplant anyway ";
 }
 if($lines < 3) { # too few lines so there is a problem -- don't save
 print " upload db contains $lines lines ";
 print "We expect a longer file ";
 print "$dataplant not updated ";
 exit(0);
 }
 print " $dataplant uploaded $outputlines records of $lines records ";
 unlink("$dataplant.old");
 link("$dataplant", "$dataplant.old") || die "backup $dataplant: $!"; # rename current file to old
 rename("$dataplant.new", "$dataplant") || die "rename $dataplant.new: $!"; # rename new file to current
 print " updated $dataplant ";
 print "</blockquote>";

 &initSearchData("_"); # There will always be "_" in the plantid so this gets us all the plants

 # this section does the all plant index page
 my $htmlfile = "../plant_toc.macro";
 my $hdr; # initial section of html file
 my $theloophdr; # the loop header

my $theloop; # the repeating part of the loop
 my $trlr; # closing section of html file

 $query = new CGI;

 open(HTML, $htmlfile) || die "can't open $htmlfile for parsing";
 open(NEWFL, ">../plants/plantindex.html") || die "can't open plantindex.html for input";

 # read the entire html file

 my $x = $/;
 undef $/;
 my $rawhtml = <HTML>
 $/ = $x;

 # Divide the html into three sections, header, trailer and loop

 $hdr = $rawhtml;
 $hdr =~ s/\<\!-- LOOP -->.*$//si; # chop off everything starting with (.*)\<\!-- \/LOOPHEADER -->.*$/$1/si; # read loopheader

$theloop = substr($rawhtml, length($hdr) + length($theloophdr)); # chop off header and loopheader (includes loop start tag)
$theloop =~ s/^.*\<\!-- \/LOOPHEADER -->(.*)\<\!-- \/LOOP -->.*$/$1/si; # read from end of loopheader to end of loop

$theloop =~ s/oneplant\.cgi\?plant=\$\{plantid\}/\$\{plantid\}\.html/ig;
$theloop =~ s(src=")(src="../)ig;
$theloop =~ s(src=')(src='../)ig;

$trlr = substr($rawhtml, length($hdr) + length($theloophdr) + length($theloop)); # chop off loop and hdr
$trlr =~ s/^.*\<\!-- \/LOOP -->(.*$)/$1/si; # read from end of loop to end of file

# Done with the "HTML compilation" phase

&setPlantRec(0);

# Adjust all the hrefs for the position of the files (go up a level to find everything)
$hdr =~ s(nav_top.macro)(../nav_top.macro)ig;
$hdr =~ s(nav_left.macro)(../nav_left.macro)ig;

$hdr = repl($hdr);
$hdr =~ s(href=")(href="../)ig;
$hdr =~ s(src=")(src="../)ig;
$hdr =~ s(href=')(href='../)ig;
$hdr =~ s(src=')(src='../)ig;
$hdr =~ s('navs/navbar)('../navs/navbar)ig;
$hdr =~ s{\.\./javascript}{javascript}ig;

$hdr =~ s{'index.html'}{'../index.html'}ig;
$hdr =~ s{'plant_groups.cgi'}{'../plant_groups.cgi'}ig;
$hdr =~ s{'search.cgi}{'../search.cgi}ig;
$hdr =~ s{'order.cgi'}{'../order.cgi'}ig;
$hdr =~ s{'about.cgi'}{'../about.cgi'}ig;
$hdr =~ s{'contact.cgi'}{'../contact.cgi'}ig;

print NEWFL $hdr;

my $n=0;
my %hash;
my $category = " ";
my @idparts;
foreach my $hold (@linesplant) {
%hash = %{$hold};
$plantid = $hash{plantid};
@idparts = split("_", $plantid);
if (!($idparts[0] eq $category)) {
$category = $idparts[0];
&setDescRec($category);
print NEWFL repl($theloophdr, "toc");
}
&setPlantRec($n);
print NEWFL repl($theloop, "toc");
$n++;
}

print NEWFL repl($trlr);

close(HTML);
close(NEWFL);

# this section writes the individual plant files

$htmlfile = "../oneplant.macro";

$query = new CGI;

open(HTML, $htmlfile) || die "can't open $htmlfile for parsing";

# read the entire html file

$x = $/;
undef $/;
$rawhtml = <HTML>
$/ = $x;

# Done with the "HTML compilation" phase
my $thishtml;
my $outputhtml;
$n=0;
foreach my $hold (@linesplant) {
 %hash = %{$hold};
 open(NEWF, ">../plants/$hash{plantid}.html") || die " cant open $hash{plantid}.html for input.";
 &setPlantRec($n);

 $thishtml = $rawhtml;
 $thishtml =~ s(nav_top.macro)(../nav_top.macro)ig;
 $thishtml =~ s(nav_left.macro)(../nav_left.macro)ig;

 $outputhtml = repl($thishtml);

 #adjust nav to call up a level
 $outputhtml =~ s(href=")(href="../)ig;
 $outputhtml =~ s(src=")(src="../)ig;
 $outputhtml =~ s(href=')(href='../)ig;
 $outputhtml =~ s(src=')(src='../)ig;
 $outputhtml =~ s('navs/navbar)('../navs/navbar)ig;
 $outputhtml =~ s{../javascript}{javascript}ig;

 $outputhtml =~ s{'index.html'}{'../index.html'}ig;
 $outputhtml =~ s{'plant_groups.cgi'}{'../plant_groups.cgi'}ig;
 $outputhtml =~ s{'search.cgi}{'../search.cgi}ig;
 $outputhtml =~ s{'order.cgi'}{'../order.cgi'}ig;
 $outputhtml =~ s{'about.cgi'}{'../about.cgi'}ig;
 $outputhtml =~ s{'contact.cgi'}{'../contact.cgi'}ig;

 print NEWF $outputhtml;
 close(NEWF);
 $n++;
 }
 }
}