#!/usr/bin/perl -w

# TODO:
#        Droplists show total of each writer/universe/title.
#        Javascript to empty a droplist if another is selected.

##########################################################################
# Modules and globals.
##########################################################################

use CGI qw/:standard/;

use vars qw (%titles, %authors, %universes %universe_map
    %author_map $directory $index);

$directory = "/work/ftp/pub/superguy/";
$index     = $directory . 'Superguy.Index.complete.txt';

%universe_map = read_mappings($directory.'map-universes');
%author_map   = read_mappings($directory.'map-authors');
%title_map    = read_mappings($directory.'map-titles');

##########################################################################
# Reading the mappings
##########################################################################

sub read_mappings {
    my ($fname) = @_;
    my ($lastmatch, %map);

    open (MAP, "< $fname") or die "Could not open mappings $fname: $!\n";
    while (<MAP>) {
        chomp;
        my ($canonical, $alias) = (/^([^\t]*)\t+(.+)$/);
        next unless $alias;
        if ($canonical) { $lastmatch = $canonical }
        else            { $canonical = $lastmatch }
        $map{$alias} = $canonical;
    }
    close MAP or die "Could not close mappings $fname: $!\n";

    return %map;
}

##########################################################################
# File parsing
##########################################################################

sub clean_title {
    my ($title) = @_;
    $title =~ s#\s+$##;
    $title =~ s# part .$##;
    $title =~ s#\s+\(\d+/\d+\)$##;
    $title =~ s#\s*\#\S+$##;
    return $title;
}

sub parse_indexes {
    my ($index) = @_;
    open (INDEX, "<$index") or die "Could not open index file $index: $!\n";
    while (<INDEX>) {
        next if /^(Vol|---)/;

        if (/^(\d*)\s+(\S+)\s+(\S+)\s+(.{37})\s+(.+)$/) {
            my ($vol, $date, $universe, $title, $author) = ($1, $2, $3, $4, $5);

            # Add the title, or its canonical form.  We want to skip any
            #  titles for the Author's Altiverse or Administrivia -- way too
            #  many.  Clean the title both before and after splitting, to
            #  kill embedded junk.
            $title = clean_title($title);
            if ($universe ne 'AA' && $universe ne 'AD') {
                foreach my $t (split/\s*\/\s*/, $title) {
                    $t = clean_title($t);
                    if (exists $title_map{$t}) {
                        $titles{$title_map{$t}}++;
                    } else {
                        $titles{$t}++;
                    }
                }
            }

            # Add the universe, or its canonical form.
            if (exists $universe_map{$universe}) {
                $universes{$universe_map{$universe}}++;
            } else {
                $universes{$universe}++;
            }

            # Add the author(s) in their canonical form.  We split on a /,
            #  since that designates multiple authors, and strip off any
            #  editor tags.
            foreach my $a (split/\//, $author) {
                $a =~ s#\s+$##;
                $a =~ s#\s+\(ed\.\)$##;
                if (exists $author_map{$a}) {
                    $authors{$author_map{$a}}++;
                } else {
                    $authors{$a}++;
                }
            }
        }
    }
    close INDEX or die "Could not close index file $index: $!\n";
}

##########################################################################
# Main program.
##########################################################################

parse_indexes($index);

# Find first and last log for variables later on.
my @logfilesfound = glob("$directory/sguy*[0-9] $directory/sguy*.gz");
my ($log_lower) = (shift(@logfilesfound) =~ /sguy0*(\d+)/);
my ($log_upper) = (pop(@logfilesfound) =~ /sguy0*(\d+)/);

$query = new CGI;
$query->autoEscape(undef);

print $query->header;
print $query->start_html(-title  => 'The Superguy Autocollector',
                         -author => 'lennox@cs.columbia.edu',
                         -style  => { 'src' => 'http://www.eyrie.org/superguy/autocollect.css' },
                        );
print h1('The Superguy Autocollector');
print p('Welcome to the '.
        '<a href="http://www.eyrie.org/superguy/">Superguy</a> '.
        'Automatic Collection generator!');

print $query->startform('Get', 'superguy-srv');
print p('Search for posts matching pattern: ');

%patterns = ('string'     => ' string',
             'casestring' => ' case insensitive string',
             'regexp'     => ' <a href="http://www.perl.com/doc/manual/html/pod/perlre.html">regular expression</a>',
            );
print p( {-class => 'search'},
        $query->textfield('pattern', '', 30) . br . 'using ' .
        $query->radio_group('searchtype',
                            ['string', 'casestring', 'regexp'],
                            'casestring', 0, \%patterns)
        . ' matching');

# Search by
print p('OR ');
print p( {-class => 'search'},
        popup_menu(-name     => 'search_author',
                   -values   => [ 'Search by Author', sort keys %authors ],
                   -default  => 'Search by Author',
                   -onchange => 'search_universe.options[0].selected = true;search_title.options[0].selected = true;',) .
        popup_menu(-name     => 'search_universe',
                   -values   => [ 'Search by Universe', sort keys %universes ],
                   -default  => 'Search by Universe',
                   -onchange => 'search_author.options[0].selected = true;search_title.options[0].selected = true;',) .
        popup_menu(-name     => 'search_title',
                   -values   => [ 'Search by Title', sort keys %titles ],
                   -default  => 'Search by Title',
                   -onchange => 'search_universe.options[0].selected = true;search_author.options[0].selected = true;',));

print p('in logs ' . $query->textfield('start',$log_lower,5) .
        ' through ' . $query->textfield('end',$log_upper,5));
print p($query->checkbox('index', undef, 'on',
                         ' Show index entries instead of post contents.'));

print p($query->submit('Search'));
print $query->endform;

print <<END;
<p>
    The collections the Autocollector generates are created from the Superguy
    archives, which are also available by FTP at
    <a href="ftp://ftp.eyrie.org/pub/superguy/">ftp://ftp.eyrie.org/pub/superguy/</a>.
    or HTTP at
    <a href="http://archives.eyrie.org/superguy/">http://archives.eyrie.org/superguy/</a>.
</p>
<p>
    The Autocollector selects the posts to output by performing the
    appropriate sort of matching on the
    <a href="http://archives.eyrie.org/superguy/Superguy.Index.complete.txt">complete index</a>.
    If your search didn't find a post you were expecting to find, you might
    want to browse that file to find out what is wrong.
</p>
<p>
    All material extracted by the Autocollector is copyright its respective
    Authors.  See the
    <a href="http://archives.eyrie.org/superguy/TEB/Frequently-Asked-Questions.gz">Superguy FAQ</a>
    for details.
</p>
<p>
    <em>Jonathan Lennox</em><br>
    <em>
        <a href="mailto:lennox\@cs.columbia.edu">lennox\@cs.columbia.edu</a>
    </em><br>
    <em>April 3, 1996</em>
</p>
END

print $query->end_html;