#!/usr/local/bin/perl use integer; # should give some speed-up $FROM_URL=$ENV{"HTTP_REFERER"}; $FROM_HOST=$ENV{"REMOTE_ADDR"}; $FROM_AGENT=$ENV{"HTTP_USER_AGENT"}; $this_script_url="http://www.chem.helsinki.fi/~toomas/misc/local-search.cgi"; $logfile="/home/toomas/private_html/misc/search_log"; $lockfile="/home/toomas/public_html/misc/search_data/indexing_in_progress"; # also defined in misc/search_data/reindex.sh $have_input=0; if (&ReadParse(*input)) { # we need to parse input here since the search string is written into the form. $have_input=1; } # ------------------------------ titles -------------------------------- print < Search Toomas Tamm's Photo Website
 Toomas's Photo Website -> Search  

Search Toomas Tamm's Photo Website


Search for:

The search engine looks for exact match for the string you typed. Since this is a small site, it is a good idea to search for single words rather than phrases.
You may also check out the site map.


EOF ; # Get a timestamp for logging: ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst)=localtime; # THIS IS PROBABLY NOT Y2K COMPLIANT AND THERE IS LITTLE I CAN HELP! $year+=1900; $locked=0; if ($have_input) { $n_results=0; $input{"string"} =~ s/^ *//; # Remove leading and trailing space. Some people have it for unknown reasons. $input{"string"} =~ s/ *$//; # Remove leading and trailing space. Some people have it for unknown reasons. if ($input{"string"} =~ /^\s*$/) { print "

No search string specified. Please try again.\n


\n"; } else { if ( -f $lockfile) { print <Sorry!

The search index is being rebuilt. This normally takes about twenty seconds.
Please try your search again.

If this message persists, please contact Toomas Tamm.


EOA ; $locked=1; } else { print "

Search results for \"",$input{"string"},"\"

\n"; # Before this can go on-line, we need to guard against shell meta-characters!!! < should be OK # Also a logging facility would be good! # Acknowledgements! $GLIMPSE="/home/toomas/soft/glimpse/glimpse-4.1/bin/glimpse"; $GLIMPSE_FLAGS="-H /home/toomas/public_html/misc/search_data -iyUck"; # -H : directory location # -i : ignore case # -y : assume "yes" response to all questions # -U : print URL # -c : print count only # -k : disallow meta characters (& regexps as a result) $COMMAND=$GLIMPSE." ".$GLIMPSE_FLAGS." '".$input{"string"}."'"; if (! ($p=open (RES,"$COMMAND|"))) { print "Command failed: ",$COMMAND,"


\n"; exit; } # print "Command started: ",$COMMAND,"
$p

\n"; $n_results=0; @urls=(); @titles=(); @counts=(); while ($_=) { $n_results++; # print $_; ($filename,$url,$title,$count)=/^([^\s]*) ([^\s]*)(.*): ([0-9]*)$/; $title =~ s/^ //; # This trick allows for empty title in the above regexp if ($title eq "") { $title=$url }; # print "File: $filename
\n"; # print "URL: $url
\n"; # print "Title: $title
\n"; # print "Count: $count
\n"; # print "

\n"; $title =~ s/\\:/:/; # Glimpse escapes colons $title =~ s/
/ /; # Some titles contain line breaks push @urls,$url; push @titles,$title; push @counts,$count; } close RES; # print "

Command ended\n"; if ($n_results <= 0) { print "No documents on this website match the query
\n"; print "Exact match for the string specified is required. You may try using a shorter or different search string.


\n"; } else { print "

$n_results document"; if ($n_results > 1) { print "s" } ; print " matched the query. Best matches are listed first.

\n

    \n"; # Did not invent a clever way to use sort here. This is # inefficient but should get the job done for short lists. # It is also destructive: the counts array gets filled with # -1's as it goes along. $done=0; while (!$done) { $best=0; $best_index=-1; $done=1; for ($i=0; $i<$n_results; $i++) { if ($counts[$i] > $best) { $done=0; $best_index=$i; $best=$counts[$i]; } } if (!$done) { print "
  • ",$titles[$best_index],"\n"; print " (",$counts[$best_index]," match"; if ($counts[$best_index] > 1) { print "es" } ; print ")\n"; $counts[$best_index]=-1; } } print "


\n"; } } } open (LOG,">>$logfile"); $host=$FROM_HOST; if ($host eq "128.214.176.27") { $host="(myself)" }; printf (LOG "%4d/%02d/%02d %02d:%02d:%02d %-16s '%s': %d match(es)",$year,$mon+1,$mday,$hour,$min,$sec,$host,$input{string},$n_results); if ($locked) { print (LOG " LOCKED!\n"); } else { print (LOG "\n"); } close (LOG); } else { open (LOG,">>$logfile"); $host=$FROM_HOST; if ($host eq "128.214.176.27") { $host="(myself)" }; printf (LOG "%4d/%02d/%02d %02d:%02d:%02d %-16s ----- %s\n",$year,$mon+1,$mday,$hour,$min,$sec,$host,$FROM_URL); close (LOG); } print < Search engine made possible by Glimpse.
  Home     Up     Site map  
Last modified: 11 June 1998
(e-mail)
validate EOF ; system "/home/toomas/private_html/AccessLog/logger local_search > /dev/null 2>&1"; exit 0; #!/usr/local/bin/perl -- -*- C -*- # Perl Routines to Manipulate CGI input # S.E.Brenner@bioc.cam.ac.uk # $Header: /cys/people/brenner/http/cgi-bin/RCS/cgi-lib.pl,v 1.8 1995/04/07 21:35:29 brenner Exp $ # # Copyright 1994 Steven E. Brenner # Unpublished work. # Permission granted to use and modify this library so long as the # copyright above is maintained, modifications are documented, and # credit is given for any use of the library. # # Thanks are due to many people for reporting bugs and suggestions # especially Meng Weng Wong, Maki Watanabe, Bo Frese Rasmussen, # Andrew Dalke, Mark-Jason Dominus and Dave Dittrich. # For more information, see: # http://www.bio.cam.ac.uk/web/form.html # http://www.seas.upenn.edu/~mengwong/forms/ # Minimalist http form and script (http://www.bio.cam.ac.uk/web/minimal.cgi): # # require "cgi-lib.pl"; # if (&ReadParse(*input)) { # print &PrintHeader, &PrintVariables(%input); # } else { # print &PrintHeader,'
Data: '; #} # ReadParse # Reads in GET or POST data, converts it to unescaped text, and puts # one key=value in each member of the list "@in" # Also creates key/value pairs in %in, using '\0' to separate multiple # selections # Returns TRUE if there was input, FALSE if there was no input # UNDEF may be used in the future to indicate some failure. # Now that cgi scripts can be put in the normal file space, it is useful # to combine both the form and the script in one place. If no parameters # are given (i.e., ReadParse returns FALSE), then a form could be output. # If a variable-glob parameter (e.g., *cgi_input) is passed to ReadParse, # information is stored there, rather than in $in, @in, and %in. sub ReadParse { local (*in) = @_ if @_; local ($i, $key, $val); # Read in text if (&MethGet) { $in = $ENV{'QUERY_STRING'}; } elsif ($ENV{'REQUEST_METHOD'} eq "POST") { read(STDIN,$in,$ENV{'CONTENT_LENGTH'}); } @in = split(/&/,$in); foreach $i (0 .. $#in) { # Convert plus's to spaces $in[$i] =~ s/\+/ /g; # Split into key and value. ($key, $val) = split(/=/,$in[$i],2); # splits on the first =. # Convert %XX from hex numbers to alphanumeric $key =~ s/%(..)/pack("c",hex($1))/ge; $val =~ s/%(..)/pack("c",hex($1))/ge; # Associate key and value $in{$key} .= "\0" if (defined($in{$key})); # \0 is the multiple separator $in{$key} .= $val; } return length($in); } # PrintHeader # Returns the magic line which tells WWW that we're an HTML document sub PrintHeader { return "Content-type: text/html\n\n"; } # MethGet # Return true if this cgi call was using the GET request, false otherwise sub MethGet { return ($ENV{'REQUEST_METHOD'} eq "GET"); } # MyURL # Returns a URL to the script sub MyURL { return 'http://' . $ENV{'SERVER_NAME'} . $ENV{'SCRIPT_NAME'}; } # CgiError # Prints out an error message which which containes appropriate headers, # markup, etcetera. # Parameters: # If no parameters, gives a generic error message # Otherwise, the first parameter will be the title and the rest will # be given as different paragraphs of the body sub CgiError { local (@msg) = @_; local ($i,$name); if (!@msg) { $name = &MyURL; @msg = ("Error: script $name encountered fatal error"); }; print &PrintHeader; print "$msg[0]\n"; print "

$msg[0]

\n"; foreach $i (1 .. $#msg) { print "

$msg[$i]

\n"; } print "\n"; } # PrintVariables # Nicely formats variables in an associative array passed as a parameter # And returns the HTML string. sub PrintVariables { local (%in) = @_; local ($old, $out, $output); $old = $*; $* =1; $output .= "
"; foreach $key (sort keys(%in)) { foreach (split("\0", $in{$key})) { ($out = $_) =~ s/\n/
/g; $output .= "
$key
$out
"; } } $output .= "
"; $* = $old; return $output; } # PrintVariablesShort # Nicely formats variables in an associative array passed as a parameter # Using one line per pair (unless value is multiline) # And returns the HTML string. sub PrintVariablesShort { local (%in) = @_; local ($old, $out, $output); $old = $*; $* =1; foreach $key (sort keys(%in)) { foreach (split("\0", $in{$key})) { ($out = $_) =~ s/\n/
/g; $output .= "$key is $out
"; } } $* = $old; return $output; } 1; #return true