#!/usr/local/bin/perl

use integer; # should give some speed-up

$FROM_URL=$ENV{"HTTP_REFERER"};
$FROM_HOST=$ENV{"REMOTE_ADDR"};
$FROM_AGENT=$ENV{"HTTP_USER_AGENT"};
$this_script_url="http://www.chem.helsinki.fi/~toomas/misc/local-search.cgi";
$logfile="/home/toomas/private_html/misc/search_log";
$lockfile="/home/toomas/public_html/misc/search_data/indexing_in_progress";  # also defined in misc/search_data/reindex.sh

$have_input=0;

if (&ReadParse(*input)) {  # we need to parse input here since the search string is written into the form.
    $have_input=1;
}

# ------------------------------ titles --------------------------------

print <<EOF
Content-type: text/html

<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd"> 
<HTML>

<HEAD>
<TITLE>Search Toomas Tamm's Photo Website</TITLE>
<META name="description"
content="Local search - no fancy features.">

<script type="text/javascript">

  var _gaq = _gaq || [];
  _gaq.push(['_setAccount', 'UA-16306848-1']);
  _gaq.push(['_trackPageview']);

  (function() {
    var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
    ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
    var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
  })();

</script>
</HEAD>
<BODY background="bg-ffffe7.gif" text="#000000">

<TABLE border=1 cellspacing=0 cellpadding=0 width="100%" bgcolor="#FFFFFF"><tr><td>
<TABLE cellspacing=0 cellpadding=0 width="100%" bgcolor="#FFFFFF"><tr><td align=left>
&nbsp;<A HREF="http://photo.toomastamm.eu/" target="_top">Toomas's Photo Website</A>

<IMG SRC="arrow1.gif" HEIGHT=8 WIDTH=16 ALT="-&gt;"> Search
</td><td> </td><td align=right>
&nbsp;
</td></tr></table></table>

<P>
<font size=7 color="#00AA77" face="sans-serif"><b>
Search Toomas Tamm's Photo Website
</b></font><br>

<P>

<HR>

<P>
<form  method=GET action="${this_script_url}">
<table cellspacing=0 cellpadding=0>
<tr><td><font face="sans-serif"><b>Search for:</b></font></td>
<td><INPUT name="string" size=50 value="$input{"string"}"></td></tr>
<tr><td> </td><td><INPUT TYPE="submit" VALUE="Submit">
</table>
</form>
<P>
The search engine looks for exact match for the string you typed. Since this is
a small site, it is a good idea to search for single words rather than phrases.
<BR>
You may also check out the <A HREF="site-map.html" target="_top">site map</A>.
<HR>

EOF
    ;

# Get a timestamp for logging:
($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst)=localtime;
# THIS IS PROBABLY NOT Y2K COMPLIANT AND THERE IS LITTLE I CAN HELP!
$year+=1900;

$locked=0;

if ($have_input) {
    $n_results=0;
    $input{"string"} =~ s/^ *//;    # Remove leading and trailing space. Some people have it for unknown reasons.
    $input{"string"} =~ s/ *$//;    # Remove leading and trailing space. Some people have it for unknown reasons.
    if ($input{"string"} =~ /^\s*$/) {
	print "<P><B>No search string specified. Please try again.</B>\n<P><HR>\n";
    } else {
	if ( -f $lockfile) {
	    print <<EOA
		<P><b>Sorry!</B><P>
		The search index is being rebuilt. This normally takes about twenty seconds.<BR>
		Please try your search again. <P>
		If this message persists, please <A HREF="http://www.toomastamm.eu/contact" target="_top">contact Toomas Tamm</A>.
                <P><HR>
EOA
		;
	    $locked=1;
	} else {
	    print "<P><H2>Search results for \"",$input{"string"},"\"</H2><P>\n";
	    # Before this can go on-line, we need to guard against shell meta-characters!!!  < should be OK
	    # Also a logging facility would be good!
	    # Acknowledgements!
	    $GLIMPSE="/home/toomas/soft/glimpse/glimpse-4.1/bin/glimpse";
	    $GLIMPSE_FLAGS="-H /home/toomas/public_html/misc/search_data -iyUck";
	    # -H : directory location
	    # -i : ignore case
	    # -y : assume "yes" response to all questions
	    # -U : print URL
	    # -c : print count only
	    # -k : disallow meta characters (& regexps as a result)
	    $COMMAND=$GLIMPSE." ".$GLIMPSE_FLAGS." '".$input{"string"}."'";
	    if (! ($p=open (RES,"$COMMAND|"))) {
		print "Command failed: <tt>",$COMMAND,"</TT><BR><HR>\n";
		exit;
	    }
#           print "Command started: <tt>",$COMMAND,"</TT><BR>$p<BR><HR>\n";
	    $n_results=0;
	    @urls=();
	    @titles=();
	    @counts=();
	    while ($_=<RES>) {
		$n_results++;
#	        print $_;
		($filename,$url,$title,$count)=/^([^\s]*) ([^\s]*)(.*): ([0-9]*)$/;
		$title =~ s/^ //;   # This trick allows for empty title in the above regexp
		if ($title eq "") { $title=$url };
#    	        print "File: $filename<br>\n";
#	        print "URL: $url<br>\n";
#	        print "Title: $title<br>\n";
#	        print "Count: $count<br>\n";
#	        print "<P>\n";
		$title =~ s/\\:/:/;   # Glimpse escapes colons
		$title =~ s/<BR>/ /;  # Some titles contain line breaks
		push @urls,$url;
		push @titles,$title;
		push @counts,$count;
	    }
	    close RES;
#           print "<P>Command ended\n";
	    if ($n_results <= 0) {
		print "No documents on this website match the query<BR>\n";
		print "Exact match for the string specified is required. You may try using a shorter or different search string.<P><HR>\n";
	    } else {
		print "<P>$n_results document";
		if ($n_results > 1) { print "s" } ;
		print " matched the query. Best matches are listed first.<P>\n<UL>\n";
		# Did not invent a clever way to use sort here. This is
		# inefficient but should get the job done for short lists.
		# It is also destructive: the counts array gets filled with
		# -1's as it goes along.
		$done=0;
		while (!$done) {
		    $best=0;
		    $best_index=-1;
		    $done=1;
		    for ($i=0; $i<$n_results; $i++) {
			if ($counts[$i] > $best) {
			    $done=0;
			    $best_index=$i;
			    $best=$counts[$i];
			}
		    }
		    if (!$done) {
			print "<LI><A HREF=\"",$urls[$best_index],"\" target=\"_top\">",$titles[$best_index],"</a>\n";
			print " <font size=-1>(",$counts[$best_index]," match";
			if ($counts[$best_index] > 1) { print "es" } ;
			print ")</font>\n";
			$counts[$best_index]=-1;
		    }		
		}
		print "</ul><p><hr>\n";
	    }
	}
    }
    open (LOG,">>$logfile");
    $host=$FROM_HOST;
    if ($host eq "128.214.176.27") { $host="(myself)" };
    printf (LOG "%4d/%02d/%02d %02d:%02d:%02d %-16s '%s': %d match(es)",$year,$mon+1,$mday,$hour,$min,$sec,$host,$input{string},$n_results);
    if ($locked) {
	print (LOG " LOCKED!\n");
    } else {
	print (LOG "\n");
    }
    close (LOG);
} else {
    open (LOG,">>$logfile");
    $host=$FROM_HOST;
    if ($host eq "128.214.176.27") { $host="(myself)" };
    printf (LOG "%4d/%02d/%02d %02d:%02d:%02d %-16s ----- %s\n",$year,$mon+1,$mday,$hour,$min,$sec,$host,$FROM_URL);
    close (LOG);
}

print <<EOF
<DIV align="right">
Search engine made possible by <A HREF="http://glimpse.cs.arizona.edu" target="_top">Glimpse</A>.</div>
<hr><center>
<table border cellspacing=1 cellpadding=2 bgcolor="#FFFFFF">
<tr>
<td>&nbsp;&nbsp;<font face="sans-serif"><A HREF="http://photo.toomastamm.eu/" target="_top">Home</A></font>&nbsp;&nbsp;</td>
</tr>
</table>
</center>
<table cellspacing=0 cellpadding=0 width="100%"><tr>
<td align=left><font size=-1><i>Last modified: 11 June 1998</i></font><br></td>
<td> </td>
<td align=right><img src="/~toomas/misc/ttw3.png" alt="(e-mail)" height=12 width=90></td>
</tr></table>
<font size="-2"><a HREF="http://validator.w3.org/check/referer" target="_top">validate</a></font>
</BODY>
</HTML>

EOF
    ;

system "/home/toomas/private_html/AccessLog/logger local_search > /dev/null 2>&1";

exit 0;

#!/usr/local/bin/perl -- -*- C -*-

# Perl Routines to Manipulate CGI input
# S.E.Brenner@bioc.cam.ac.uk
# $Header: /cys/people/brenner/http/cgi-bin/RCS/cgi-lib.pl,v 1.8 1995/04/07 21:35:29 brenner Exp $
#
# Copyright 1994 Steven E. Brenner  
# Unpublished work.
# Permission granted to use and modify this library so long as the
# copyright above is maintained, modifications are documented, and
# credit is given for any use of the library.
#
# Thanks are due to many people for reporting bugs and suggestions
# especially Meng Weng Wong, Maki Watanabe, Bo Frese Rasmussen,
# Andrew Dalke, Mark-Jason Dominus and Dave Dittrich.

# For more information, see:
#     http://www.bio.cam.ac.uk/web/form.html       
#     http://www.seas.upenn.edu/~mengwong/forms/   

# Minimalist http form and script (http://www.bio.cam.ac.uk/web/minimal.cgi):
#
# require "cgi-lib.pl";
# if (&ReadParse(*input)) {
#    print &PrintHeader, &PrintVariables(%input);
# } else {
#   print &PrintHeader,'<form><input type="submit">Data: <input name="myfield">';
#}

# ReadParse
# Reads in GET or POST data, converts it to unescaped text, and puts
# one key=value in each member of the list "@in"
# Also creates key/value pairs in %in, using '\0' to separate multiple
# selections

# Returns TRUE if there was input, FALSE if there was no input 
# UNDEF may be used in the future to indicate some failure.

# Now that cgi scripts can be put in the normal file space, it is useful
# to combine both the form and the script in one place.  If no parameters
# are given (i.e., ReadParse returns FALSE), then a form could be output.

# If a variable-glob parameter (e.g., *cgi_input) is passed to ReadParse,
# information is stored there, rather than in $in, @in, and %in.

sub ReadParse {
  local (*in) = @_ if @_;
  local ($i, $key, $val);

  # Read in text
  if (&MethGet) {
    $in = $ENV{'QUERY_STRING'};
  } elsif ($ENV{'REQUEST_METHOD'} eq "POST") {
    read(STDIN,$in,$ENV{'CONTENT_LENGTH'});
  }

  @in = split(/&/,$in);

  foreach $i (0 .. $#in) {
    # Convert plus's to spaces
    $in[$i] =~ s/\+/ /g;

    # Split into key and value.  
    ($key, $val) = split(/=/,$in[$i],2); # splits on the first =.

    # Convert %XX from hex numbers to alphanumeric
    $key =~ s/%(..)/pack("c",hex($1))/ge;
    $val =~ s/%(..)/pack("c",hex($1))/ge;

    # Associate key and value
    $in{$key} .= "\0" if (defined($in{$key})); # \0 is the multiple separator
    $in{$key} .= $val;

  }

  return length($in); 
}

# PrintHeader
# Returns the magic line which tells WWW that we're an HTML document

sub PrintHeader {
  return "Content-type: text/html\n\n";
}

# MethGet
# Return true if this cgi call was using the GET request, false otherwise

sub MethGet {
  return ($ENV{'REQUEST_METHOD'} eq "GET");
}

# MyURL
# Returns a URL to the script
sub MyURL  {
  return  'http://' . $ENV{'SERVER_NAME'} .  $ENV{'SCRIPT_NAME'};
}

# CgiError
# Prints out an error message which which containes appropriate headers,
# markup, etcetera.
# Parameters:
#  If no parameters, gives a generic error message
#  Otherwise, the first parameter will be the title and the rest will 
#  be given as different paragraphs of the body

sub CgiError {
  local (@msg) = @_;
  local ($i,$name);

  if (!@msg) {
    $name = &MyURL;
    @msg = ("Error: script $name encountered fatal error");
  };

  print &PrintHeader;
  print "<html><head><title>$msg[0]</title></head>\n";
  print "<body><h1>$msg[0]</h1>\n";
  foreach $i (1 .. $#msg) {
    print "<p>$msg[$i]</p>\n";
  }
  print "</body></html>\n";
}

# PrintVariables
# Nicely formats variables in an associative array passed as a parameter
# And returns the HTML string.

sub PrintVariables {
  local (%in) = @_;
  local ($old, $out, $output);
  $old = $*;  $* =1;
  $output .=  "<DL COMPACT>";
  foreach $key (sort keys(%in)) {
    foreach (split("\0", $in{$key})) {
      ($out = $_) =~ s/\n/<BR>/g;
      $output .=  "<DT><B>$key</B><DD><I>$out</I><BR>";
    }
  }
  $output .=  "</DL>";
  $* = $old;

  return $output;
}

# PrintVariablesShort
# Nicely formats variables in an associative array passed as a parameter
# Using one line per pair (unless value is multiline)
# And returns the HTML string.

sub PrintVariablesShort {
  local (%in) = @_;
  local ($old, $out, $output);
  $old = $*;  $* =1;
  foreach $key (sort keys(%in)) {
    foreach (split("\0", $in{$key})) {
      ($out = $_) =~ s/\n/<BR>/g;
      $output .= "<B>$key</B> is <I>$out</I><BR>";
    }
  }
  $* = $old;

  return $output;
}

1; #return true 

