#!/usr/bin/perl -w # TODO: # Droplists show total of each writer/universe/title. # Javascript to empty a droplist if another is selected. ########################################################################## # Modules and globals. ########################################################################## use CGI qw/:standard/; use vars qw (%titles, %authors, %universes %universe_map %author_map $directory $index); $directory = "/work/ftp/pub/superguy/"; $index = $directory . 'Superguy.Index.complete.txt'; %universe_map = read_mappings($directory.'map-universes'); %author_map = read_mappings($directory.'map-authors'); %title_map = read_mappings($directory.'map-titles'); ########################################################################## # Reading the mappings ########################################################################## sub read_mappings { my ($fname) = @_; my ($lastmatch, %map); open (MAP, "< $fname") or die "Could not open mappings $fname: $!\n"; while () { chomp; my ($canonical, $alias) = (/^([^\t]*)\t+(.+)$/); next unless $alias; if ($canonical) { $lastmatch = $canonical } else { $canonical = $lastmatch } $map{$alias} = $canonical; } close MAP or die "Could not close mappings $fname: $!\n"; return %map; } ########################################################################## # File parsing ########################################################################## sub clean_title { my ($title) = @_; $title =~ s#\s+$##; $title =~ s# part .$##; $title =~ s#\s+\(\d+/\d+\)$##; $title =~ s#\s*\#\S+$##; return $title; } sub parse_indexes { my ($index) = @_; open (INDEX, "<$index") or die "Could not open index file $index: $!\n"; while () { next if /^(Vol|---)/; if (/^(\d*)\s+(\S+)\s+(\S+)\s+(.{37})\s+(.+)$/) { my ($vol, $date, $universe, $title, $author) = ($1, $2, $3, $4, $5); # Add the title, or its canonical form. We want to skip any # titles for the Author's Altiverse or Administrivia -- way too # many. Clean the title both before and after splitting, to # kill embedded junk. $title = clean_title($title); if ($universe ne 'AA' && $universe ne 'AD') { foreach my $t (split/\s*\/\s*/, $title) { $t = clean_title($t); if (exists $title_map{$t}) { $titles{$title_map{$t}}++; } else { $titles{$t}++; } } } # Add the universe, or its canonical form. if (exists $universe_map{$universe}) { $universes{$universe_map{$universe}}++; } else { $universes{$universe}++; } # Add the author(s) in their canonical form. We split on a /, # since that designates multiple authors, and strip off any # editor tags. foreach my $a (split/\//, $author) { $a =~ s#\s+$##; $a =~ s#\s+\(ed\.\)$##; if (exists $author_map{$a}) { $authors{$author_map{$a}}++; } else { $authors{$a}++; } } } } close INDEX or die "Could not close index file $index: $!\n"; } ########################################################################## # Main program. ########################################################################## parse_indexes($index); # Find first and last log for variables later on. my @logfilesfound = glob("$directory/sguy*[0-9] $directory/sguy*.gz"); my ($log_lower) = (shift(@logfilesfound) =~ /sguy0*(\d+)/); my ($log_upper) = (pop(@logfilesfound) =~ /sguy0*(\d+)/); $query = new CGI; $query->autoEscape(undef); print $query->header; print $query->start_html(-title => 'The Superguy Autocollector', -author => 'lennox@cs.columbia.edu', -style => { 'src' => 'http://www.eyrie.org/superguy/autocollect.css' }, ); print h1('The Superguy Autocollector'); print p('Welcome to the '. 'Superguy '. 'Automatic Collection generator!'); print $query->startform('Get', 'superguy-srv'); print p('Search for posts matching pattern: '); %patterns = ('string' => ' string', 'casestring' => ' case insensitive string', 'regexp' => ' regular expression', ); print p( {-class => 'search'}, $query->textfield('pattern', '', 30) . br . 'using ' . $query->radio_group('searchtype', ['string', 'casestring', 'regexp'], 'casestring', 0, \%patterns) . ' matching'); # Search by print p('OR '); print p( {-class => 'search'}, popup_menu(-name => 'search_author', -values => [ 'Search by Author', sort keys %authors ], -default => 'Search by Author', -onchange => 'search_universe.options[0].selected = true;search_title.options[0].selected = true;',) . popup_menu(-name => 'search_universe', -values => [ 'Search by Universe', sort keys %universes ], -default => 'Search by Universe', -onchange => 'search_author.options[0].selected = true;search_title.options[0].selected = true;',) . popup_menu(-name => 'search_title', -values => [ 'Search by Title', sort keys %titles ], -default => 'Search by Title', -onchange => 'search_universe.options[0].selected = true;search_author.options[0].selected = true;',)); print p('in logs ' . $query->textfield('start',$log_lower,5) . ' through ' . $query->textfield('end',$log_upper,5)); print p($query->checkbox('index', undef, 'on', ' Show index entries instead of post contents.')); print p($query->submit('Search')); print $query->endform; print < The collections the Autocollector generates are created from the Superguy archives, which are also available by FTP at ftp://ftp.eyrie.org/pub/superguy/. or HTTP at http://archives.eyrie.org/superguy/.

The Autocollector selects the posts to output by performing the appropriate sort of matching on the complete index. If your search didn't find a post you were expecting to find, you might want to browse that file to find out what is wrong.

All material extracted by the Autocollector is copyright its respective Authors. See the Superguy FAQ for details.

Jonathan Lennox
lennox\@cs.columbia.edu
April 3, 1996

END print $query->end_html;