Programming : Perl

Perl

This is a script used to upload and parse a data file



#!/usr/bin/perl -w


# This Script was developed for www.miniplantkingdom.com.
# Their site is run by Perl CGI scripts reading a flat file database
# to fill out the various web pages on the fly.

# This file uploads a database created from a FileMaker File and output as comma delimited
# text file to be processed, then it checks the file for properly filled out records, usually
# dropping the record if it is not correct.
# After error checking, the corrected text file is saved for CGI use by the site,
# and serious errors are reported back to the user, so they can decide if they want to try
# uploading again after fixing the data in the FileMaker File.
# after uploading, a web page is made for each plant record that is uploaded
# and an index is made with a listing linking to all the plant web pages.
# the individual files are made for spiders and web crawlers to identify for search engines

use strict;
use CGI;
use CGI::Carp qw(fatalsToBrowser);
use lib '../lib';

use sitelib qw($dataplant $picdir $plantgroup @datadefplant @linesplant %groupdescriptions
                                                fmchars initSearchData repl setPlantRec setDescRec );
$dataplant = "../$dataplant";             # fix dataplant since we are in a different place

umask(022);                                     # create files world readable, but not writeable.

my $query = new CGI;

print $query->header;
print $query->start_html(-title=>'Upload Databases',
                                    -text=>'#000000',
                                    -bgcolor=>'#FFFFFF',
                                    -LINK=>'#FF6666',
                                    -VLINK=>'#FF6666',
                                    -ALINK=>'#CCCCCC');

print <<ETX;
<blockquote>
<H2>Upload database export files for MPK...</H2>
<HR>
ETX

print $query->start_multipart_form(-method=>'POST',
                                                -encoding=>'multipart/form-data');

print "Plant database file (plants.txt)<BR>\n";
print $query->filefield(-name=>"plantDB"), "<BR><BR>\n";

print $query->submit(-name=>"submit", value=>"Upload Files"), "\n";

print $query->endform;

print $query->end_html, "\n";

print "<BR><HR><BR>\n";

if($query->request_method() eq 'POST') {
     no strict 'refs';
      my $fh;                                              # input data file handle
       my $lineerrs = 0;                                # improper number of fields
        my $missingerrs = 0;                              # required field missing data
         my $n=0; my $x=0;                                  # counter variables
          $fh = $query->param("plantDB");
           if(length($fh)) {
                        my($savsep) = $/;
                        my(@filelines);                                # input file divided into lines
                        my $lines = 0;                                # counter for lines of input file
my $outputlines = 0; # counter for lines we output
my $plantlines = 0; # counter for lines in lineplant array
  my %cnmap;   # hash of what field number is each property
   my $plantid;    # plantid of this line of input
    my $oldplantid;     # plantid of previous line of input
     my @splitid;      # array of plantid on '_'
      my $categorycode = " "; # holds the category code to check for category data
       my $deadcategory = " ";

         @cnmap{@datadefplant} = 0 .. int(@datadefplant);

           open(NEWF, ">$dataplant.new") || die "can't open $dataplant.new: $!";
            undef $/;
            $_ = <$fh>                        # read in the entire file
              $/ = $savsep;
               if(tr/\n/\n/ < 2) {               # no newlines so it must be mac format
                            @filelines = split(/\r/);
                 } else {
                              @filelines = split(/\r?\n/);
                   }
                    PCLINE: foreach $_ (@filelines) {
                                 $_ = fmchars($_);                # convert/rewrite filemaker output chars to html style
                                  next if(/^\s*$/);
                                   my(@splits) = split(/            /); # split on tab
                                    $n=0;
                                   
                                    $plantid = $splits[$cnmap{'plantid'}];
  @splitid = split(/_/, $plantid);
  
    if ($splitid[0] ne $deadcategory) { # deadcategory records if a category has no descriptor.          # if that is the case dont load any of them.
                 # check for leading and trailing blanks in all fields and delete them
                  foreach my $data (@splits){
                               #leading blanks
                                if ($data =~ /^ (.*)/){
                                             while ($data =~ /(.*) $/){
                                              $data = $1;
                                               }
                                      }
                                    # trailing blanks
                                      if ($data =~ /(.*) $/){
                                                   while ($data =~ /(.*) $/){
                                                    $data = $1;
                                                     }
                                          }
                                           $n++
                                }
                                
                                 
                                   #check format of plantid
                                    if(int(@splitid) != 2){
                                                print "<br> Line number $lines \"$splits[$cnmap{'aabotanicalname'}]\": has the plantid set up wrong. <BR>";
                                                  print " it should be in the format of XX_000.";
                                                   $lineerrs++;
                                                    $lines++;
                                                     next PCLINE;
                                          }                          
                                           #check format of plantid
                                            if(!($plantid =~ /^[\@A-Z][\@A-Z]_[0-9][0-9][0-9]$/)){
                                                         print "<br> Line number $lines \"$splits[$cnmap{'aabotanicalname'}]\": has the plantid set up wrong. <BR>";
                                                          print " it should be in the format of XX_000.";
                                                           $lineerrs++;
                                                            $lines++;
                                       next PCLINE;
                            }
                            
                              #check for duplicate plantid
                               if(!($plantid =~ /^[\@A-Z][\@A-Z]_[0-9][0-9][0-9]$/)){
                                            print "<br> Line number $lines \"$splits[$cnmap{'aabotanicalname'}]\": has the same plantid as the one before it. <BR>";
                                             print " I am skipping it.<br><br>";
                                              $lineerrs++;
                                               $lines++;
                                                next PCLINE;
                                    }
                                     
                                       # check for the proper number of fields in the line. Skip if if the wrong number
                                        if(int(@splits) != int(@datadefplant)){
                                                     $n = int(@splits);
                                                      $x = int(@datadefplant);
                                                       print "<br> Line number $lines \"$splits[$cnmap{'aabotanicalname'}]\": has wrong number of fields <BR>";
                                                        print " it only had $n fields instead of $x, so I ignored this record<BR><br>";
                                                         $lineerrs++;
                                                          $lines++;
                                                           next PCLINE;
                                      }
                                      
                                        # check for inclusion of description lines for each new category
                                         if ($categorycode ne $splitid[0]){
                                                      if (!($splitid[1] eq '000')){
                                                                   $deadcategory = $splitid[0];
                                                                    print "<br>There is no description (".$splitid[0]."_000) for category $splits[$cnmap{'aacategory'}] (line $lines) <br>";
                                                                     print "So I am skipping this entire category!!!<br><br>";
                                                          }
                                  $categorycode = $splitid[0];
                       }
                       
                        # check for contents in required content fields
                          if (!($splits[$cnmap{plantid}] =~ /000/)){
                           foreach my $name ( qw(available aacategory aabotanicalname ever zone)) { # sizes
                                        if(!length($splits[$cnmap{$name}])) {
                                                     print "<br>Line number $lines \"$splits[$cnmap{'aabotanicalname'}]\": contains empty $name field -- ignored record<BR><BR>\n";
                                                      $missingerrs++;
                                                       $lines++;
                                                        next PCLINE;
                                             }
                                  }
                                              
                                                # check for images data and verify that the image is where it belongs. if a problem, put in default image.
                                                foreach my $image (qw(imagea imageb imagec imaged)){
                                                            if(!length($splits[$cnmap{$image}])) {
                                                                        print "Plant photo $splits[$cnmap{$image}] for $splits[$cnmap{aabotanicalname}] (line $lines) not provided<BR>\n";
                                                                        $splits[$cnmap{$image}] = "default".$image.".jpg";
                                                            } elsif(! -f "$picdir/$splits[$cnmap{$image}]") {
                                                                       $splits[$cnmap{$image}] = "default".$image.".jpg";
                                                            }
                                                }
                                               
                                                # This is where expansions go
                                                if (uc($splits[$cnmap{ever}]) eq "Y"){
                                                            $splits[$cnmap{ever}] = "Evergreen";
                                                } else {
                                                            $splits[$cnmap{ever}] = "Deciduous";
                                                }
                                               
                                                $splits[$cnmap{sizes}] =~ s/(Sm|Sm\.)/Small Pot<br>/ig;
                                                $splits[$cnmap{sizes}] =~ s/(Md|Md\.)/Medium Pot<br>/ig;
                                                $splits[$cnmap{sizes}] =~ s/(Lg|Lg\.)/Large Pot<br>/ig;
                                                $splits[$cnmap{sizes}] =~ s/(Jb|Jb\.|\.Jumbo|Jumbo\.)/Jumbo Pot<br>/ig;
                                                $splits[$cnmap{sizes}] =~ s/(Other\.|\.Other)/Other Sizes<br>/ig;
                                                $splits[$cnmap{water_LMH}] =~ s/[Ll]/Low<br>/ig;
                                                $splits[$cnmap{water_LMH}] =~ s/[Mm]/Moderate<br>/ig;
                                                $splits[$cnmap{water_LMH}] =~ s/[Hh]/Heavy<br>/ig;
                                                $splits[$cnmap{gardentype}] =~ s/\./<br>/g;
                                                $splits[$cnmap{optimal_light}] =~ s/\./<br>/g;
                                               
                                                }
                                               
                                                $lines++;
                                                $outputlines++;
                                                if ($splitid[0] ne $deadcategory) { # deadcategory records if a category has no descriptor.                                                                                                                                                 # if that is the case dont load any of them.
                                               
                                                my $result = join('             ', @splits);
                                                print NEWF "$result\n";
                                                }
                                    } else { #deadcategory
                                    $lines++;
                                    }
                        }
                       
                        close(NEWF);
                       
                        # complain about errors
                        if(($lineerrs + $missingerrs) > 10) { # too many errors so don't save
                                    print "<BR><BR>upload of $fh contains $lineerrs lines with incorrect number of fields<BR>";
                                    print "and $missingerrs lines with missing category or botanical name.<br>";
                                    print "$dataplant not updated<BR><BR><BR>";
                                    # comment out the exit if you want to save anyway
                                    exit(0);
                        } elsif (($lineerrs + $missingerrs) > 0) { # only a few errors so allow saving
                                    print "<BR><BR>upload of $fh contains $lineerrs lines with incorrect number of fields<BR>";
                                    print "and $missingerrs lines with missing category or botanical name.<br>";
                                    print "ignored those lines and updated $dataplant anyway<BR><BR><BR>";
                        }
                        if($lines < 3) { # too few lines so there is a problem -- don't save
                                    print "<BR><BR>upload db contains $lines lines<BR>";
                                    print "We expect a longer file<BR>";
                                    print "$dataplant not updated<BR><BR><BR>";
                                    exit(0);
                        }
                        print "<BR>$dataplant uploaded $outputlines records of $lines records<BR>";
                        unlink("$dataplant.old");
                        link("$dataplant", "$dataplant.old") || die "backup $dataplant: $!";                         # rename current file to old
                        rename("$dataplant.new", "$dataplant") || die "rename $dataplant.new: $!";           # rename new file to current
                        print "<BR>updated $dataplant<BR>";
                        print "</blockquote>";

                        &initSearchData("_"); # There will always be "_" in the plantid so this gets us all the plants
                       
                          # this section does the all plant index page
                        my $htmlfile = "../plant_toc.macro";
                        my $hdr;                                    # initial section of html file
                        my $theloophdr;                          # the loop header

                        my $theloop;                               # the repeating part of the loop
                        my $trlr;                                     # closing section of html file

                        $query = new CGI;

                        open(HTML, $htmlfile) || die "can't open $htmlfile for parsing";
                        open(NEWFL, ">../plants/plantindex.html") || die "can't open plantindex.html for input";

                        #             read the entire html file

                        my $x = $/;
                        undef $/;
                        my $rawhtml = <HTML>
                        $/ = $x;

                        #             Divide the html into three sections, header, trailer and loop

                        $hdr = $rawhtml;
                        $hdr =~ s/\<\!-- LOOP -->.*$//si; # chop off everything starting with <!-- LOOP

$theloophdr = substr($rawhtml, length($hdr)); # chop off header
$theloophdr =~ s/^.*\<\!-- LOOPHEADER -->(.*)\<\!-- \/LOOPHEADER -->.*$/$1/si; # read loopheader

$theloop = substr($rawhtml, length($hdr) + length($theloophdr)); # chop off header and loopheader (includes loop start tag)
$theloop =~ s/^.*\<\!-- \/LOOPHEADER -->(.*)\<\!-- \/LOOP -->.*$/$1/si; # read from end of loopheader to end of loop

$theloop =~ s/oneplant\.cgi\?plant=\$\{plantid\}/\$\{plantid\}\.html/ig;
$theloop =~ s(src=")(src="../)ig;
$theloop =~ s(src=')(src='../)ig;

$trlr = substr($rawhtml, length($hdr) + length($theloophdr) + length($theloop)); # chop off loop and hdr
$trlr =~ s/^.*\<\!-- \/LOOP -->(.*$)/$1/si; # read from end of loop to end of file

# Done with the "HTML compilation" phase

&setPlantRec(0);

# Adjust all the hrefs for the position of the files (go up a level to find everything)
$hdr =~ s(nav_top.macro)(../nav_top.macro)ig;
$hdr =~ s(nav_left.macro)(../nav_left.macro)ig;

$hdr = repl($hdr);
$hdr =~ s(href=")(href="../)ig;
$hdr =~ s(src=")(src="../)ig;
$hdr =~ s(href=')(href='../)ig;
$hdr =~ s(src=')(src='../)ig;
$hdr =~ s('navs/navbar)('../navs/navbar)ig;
$hdr =~ s{\.\./javascript}{javascript}ig;

$hdr =~ s{'index.html'}{'../index.html'}ig;
$hdr =~ s{'plant_groups.cgi'}{'../plant_groups.cgi'}ig;
$hdr =~ s{'search.cgi}{'../search.cgi}ig;
$hdr =~ s{'order.cgi'}{'../order.cgi'}ig;
$hdr =~ s{'about.cgi'}{'../about.cgi'}ig;
$hdr =~ s{'contact.cgi'}{'../contact.cgi'}ig;

print NEWFL $hdr;

my $n=0;
my %hash;
my $category = " ";
my @idparts;
foreach my $hold (@linesplant) {
%hash = %{$hold};
$plantid = $hash{plantid};
@idparts = split("_", $plantid);
if (!($idparts[0] eq $category)) {
$category = $idparts[0];
&setDescRec($category);
print NEWFL repl($theloophdr, "toc");
}
&setPlantRec($n);
print NEWFL repl($theloop, "toc");
$n++;
}

print NEWFL repl($trlr);

close(HTML);
close(NEWFL);


# this section writes the individual plant files

$htmlfile = "../oneplant.macro";

$query = new CGI;

open(HTML, $htmlfile) || die "can't open $htmlfile for parsing";

# read the entire html file

$x = $/;
undef $/;
$rawhtml = <HTML>
$/ = $x;

# Done with the "HTML compilation" phase
my $thishtml;
my $outputhtml;
$n=0;
foreach my $hold (@linesplant) {
    %hash = %{$hold};
     open(NEWF, ">../plants/$hash{plantid}.html") || die " cant open $hash{plantid}.html for input.";
      &setPlantRec($n);
      
        $thishtml = $rawhtml;
         $thishtml =~ s(nav_top.macro)(../nav_top.macro)ig;
          $thishtml =~ s(nav_left.macro)(../nav_left.macro)ig;
          
            $outputhtml = repl($thishtml);
           
              #adjust nav to call up a level
               $outputhtml =~ s(href=")(href="../)ig;
                $outputhtml =~ s(src=")(src="../)ig;
                 $outputhtml =~ s(href=')(href='../)ig;
                  $outputhtml =~ s(src=')(src='../)ig;
                   $outputhtml =~ s('navs/navbar)('../navs/navbar)ig;
                    $outputhtml =~ s{../javascript}{javascript}ig;
        
                      $outputhtml =~ s{'index.html'}{'../index.html'}ig;
                       $outputhtml =~ s{'plant_groups.cgi'}{'../plant_groups.cgi'}ig;
                        $outputhtml =~ s{'search.cgi}{'../search.cgi}ig;
                        $outputhtml =~ s{'order.cgi'}{'../order.cgi'}ig;
                          $outputhtml =~ s{'about.cgi'}{'../about.cgi'}ig;
                           $outputhtml =~ s{'contact.cgi'}{'../contact.cgi'}ig;
               
                             print NEWF $outputhtml;
                              close(NEWF);
                               $n++;
                    }
         }
}