#!/usr/bin/perl $ID = q$Id: filter-syslog,v 1.20 2007-04-14 04:17:25 eagle Exp $; # # filter-syslog -- Filters a syslog file and mails the results. # # Written by Russ Allbery # Copyright 2002, 2003, 2004, 2006, 2007 # Board of Trustees, Leland Stanford Jr. University # # This program is free software; you can redistribute it and/or modify it # under the same terms as Perl itself. ############################################################################## # Site configuration ############################################################################## # filter-syslog looks for filter-syslog.conf in the following directories. # /etc/leland is included for backward-compatibility reasons at Stanford. @CONFIGDIR = qw(/etc /etc/leland); # Log parsing regexes. These do the basic parse of the log line and any line # that fails even this basic parse, except for a few specially recognized # variations, will be resent to the reporting address. Right now, these # regexes are constrained by having to return the timestamp as $1, the # hostname as $2, the ID (the program name) as $3, and the message itself as # $4. @REGEXES = ( qr/^(\S+\s*\S+\s*\S+) (\S+) (\S+): (?:\[\S+ \d+ \S+\] )?(.*)/, qr/^(\S+\s*\S+\s*\S+) \S+ Message forwarded from (\S+): (\S+): (.*)/ ); ############################################################################## # Modules and declarations ############################################################################## require 5.006; use strict; use vars qw(%CONFIG @CONFIGDIR $ID @REGEXES); use Getopt::Long qw(GetOptions); use Sys::Hostname qw(hostname); ############################################################################## # Configuration parsing ############################################################################## # Parse an individual configuration file. Calls itself recursively to handle # included files or directories. Note that there is currently no defense # against recursive inclusion of files other than a simple check that a file # doesn't include itself. sub config_parse_file { my ($file) = @_; local $_; local *CONFFILE; open (CONFFILE, $file) or die "$0: cannot open $file: $!\n"; while () { chomp; next if /^\s*\#/; next if /^\s*$/; if (/^\s*(\S+)\s*=\s*(.*)/) { my ($param, $value) = ($1, $2); $param = lc $param; unless ($value) { warn "$0:$file:$.: parse error, no value for variable\n"; } $CONFIG{$param} = $value; } elsif (m%^\s*include\s+(\S+)\s*$%) { my $inc = $1; my @files; if (-d $inc) { opendir (D, $inc) or die "$0:$file:$.: cannot open directory $inc: $!\n"; @files = map { "$inc/$_" } grep { !/\./ } readdir D; closedir D; } else { @files = ($inc); } for (@files) { if ($_ eq $file) { die "$0:$file:$.: config file recursively included\n"; } config_parse_file ($_); } } elsif (m%^\s*(\S+):\s*/(.*)/\s*$%) { my ($program, $regex) = ($1, $2); if ($program eq '*') { push (@{ $CONFIG{global} }, qr/$regex/); } else { $CONFIG{ignore}{$program} ||= []; push (@{ $CONFIG{ignore}{$program} }, qr/$regex/); } } else { warn "$0:$file:$.: parse error, unknown line\n"; } } close CONFFILE; } # Parse a configuration file and fill out the %CONFIG hash. Also make sure # that all the required configuration parameters are set. sub config_parse { my $file = shift; $CONFIG{global} = []; config_parse_file ($file); for (qw/alert subject/) { unless ($CONFIG{$_}) { warn "$0: paramter $_ not set in $file\n"; } if ($CONFIG{$_} =~ /[\\\']/) { die "$0: $_ setting must not contain \\ or '\n"; } } $CONFIG{subject} =~ s/\$h/hostname/e; } ############################################################################## # Mail sending ############################################################################## # Initialize mail sending. sub mail_init { my $nomail = shift; if ($nomail) { open (MAIL, '>&STDOUT') or die "$0: cannot dup stdout: $!\n"; } else { my ($command) = grep { -x $_ } qw(/usr/sbin/sendmail /usr/lib/sendmail); $command ||= '/usr/lib/sendmail'; $command .= " -f '$CONFIG{sender}'" if $CONFIG{sender}; $command .= " '$CONFIG{alert}'"; open (MAIL, "| $command") or die "$0: unable to fork sendmail: $!\n"; print MAIL "From: $CONFIG{sender}\n" if $CONFIG{sender}; print MAIL "To: $CONFIG{alert}\nSubject: $CONFIG{subject}\n\n"; } } ############################################################################## # Implementation ############################################################################## # Clean up $0 for error reporting. my $fullpath = $0; $0 =~ s%^.*/%%; # Parse command-line options. my ($help, $hostname, $nomail, $version); Getopt::Long::config ('bundling', 'no_ignore_case'); GetOptions ('help|h' => \$help, 'hostname|o' => \$hostname, 'no-mail|n' => \$nomail, 'version|v' => \$version) or exit 1; if ($help) { print "Feeding myself to perldoc, please wait....\n"; exec ('perldoc', '-t', $fullpath) or die "$0: can't fork: $!\n"; } elsif ($version) { my $version = join (' ', (split (' ', $ID))[1..3]); $version =~ s/,v\b//; $version =~ s/(\S+)$/($1)/; $version =~ tr%/%-%; print $version, "\n"; exit 0; } # The path to the config file is the only argument. my $config = shift || 'filter-syslog.conf'; unless ($config =~ m%^\.?/%) { for my $dir (@CONFIGDIR) { if (-f "$dir/$config") { $config = "$dir/$config"; last; } } } config_parse ($config); # Now, process our input. We spit our output out through a pipe to sendmail # if we find anything of note. my ($sending, $last); LINE: while (<>) { $last = 0, next if /-- MARK --/; # Ignore Linux syslogd restart messages, which don't follow a normal # format (they have the version number of syslogd after the name of the # program). $last = 0, next if /^(\S+\s*\S+\s*\S+) \S+ syslogd [\d.\#]+: restart\b/; $last = 0, next if /^(\S+\s*\S+\s*\S+) \S+ exiting on signal 15$/; # Handle repeated messages by including the message repeated line if we # included the previous line. if (/^(\S+\s*\S+\s*\S+) \S+ (last message repeated \d+ time)/) { if ($last) { my $line = "$1 $2"; mail_init ($nomail) unless $sending; $sending = 1; print MAIL "$line\n"; } next; } # Parse the syslog line. Any line that we can't parse is mailed. my ($timestamp, $hostname, $id, $line); for my $regex (@REGEXES) { if (/$regex/) { ($timestamp, $hostname, $id, $line) = ($1, $2, $3, $4); last; } } unless ($timestamp) { mail_init ($nomail) unless $sending; $sending = 1; print MAIL $_; next; } $line =~ s/\s+$//; # Check to see if we're ignoring this message, and if not, start a report # (if we haven't already) and include it in the report. my ($program) = ($id =~ /^([^\[]+)/); for my $regex (@{ $CONFIG{global} }) { $last = 0, next LINE if $line =~ /$regex/; } if ($CONFIG{ignore}{$program}) { for my $regex (@{ $CONFIG{ignore}{$program} }) { $last = 0, next LINE if $line =~ /$regex/; } } mail_init ($nomail) unless $sending; $sending = 1; print MAIL $_; $last = 1; } if ($sending) { close MAIL; exit 0 if $nomail; die ("$0: sendmail exited with status", ($? >> 8), "\n") if $? != 0; } ############################################################################## # Documentation ############################################################################## =head1 NAME filter-syslog - Filters a syslog file and mails the results =head1 SYNOPSIS filter-syslog [B<-hnov>] [I] < I =head1 DESCRIPTION B parses a log generated by syslog, filtering out all of the boring lines as configured in I, and then mails the remaining lines to the address specified in I. It expects the log file on standard input, and is designed to run from an analyze action in newsyslog(8), although it can be used in other situations as well. If I isn't an absolute path, it's taken to be relative to either F or F, wherever the file is found (searched in that order). If I is not specified, it defaults to F and is looked for in both F and F. Lines containing C<-- MARK --> and syslog restart messages on Linux, which look like: Sep 10 04:02:07 example syslogd 1.4.1: restart. Apr 1 23:55:01 example syslogd 1.4.1#10: restart. Apr 1 23:55:01 example syslogd 1.4.1#10: restart (remote reception). Apr 1 23:55:09 example exiting on signal 15 are always ignored. Messages of the form: Apr 28 07:09:40 10.1.1.1 Message forwarded from example.org: \ program[36398]: some log message (line split only for readability in this example) will be parsed exactly as if they had said: Apr 28 07:09:40 example.org program[36398]: some log message This format is used by OpenBSD for forwarded syslog messages. Please note that this is not intended to be a security tool or a real-time monitoring tool, but rather a tool to make sure that system administrators are aware of unusual log messages that might indicate server problems or failing hardware. An intrusion detection system would work differently and would be more paranoid, and a real-time monitoring tool wouldn't run in batch mode. There are other tools available to do that type of monitoring. =head1 OPTIONS =over 4 =item B<-h>, B<--help> Print out this documentation (which is done simply by feeding the script to C). =item B<-n>, B<--no-mail> Rather than sending the results via e-mail, instead print out the non-boring lines that would have been sent via e-mail to standard output. Useful for testing filter rules. =item B<-o>, B<--hostname> Display the hostname field (from the input syslog) in the output. =item B<-v>, B<--version> Print the version of B and exit. =back =head1 CONFIGURATION FILE There are three types of valid lines in the configuration file; variable settings, filter patterns, and includes of other configuration files. A variable setting looks like: variable = value where I can contain whitespace (but can't begin with whitespace). A filter pattern looks like: program: /regex/ where I is the name of a particular program (the filter line will only apply to log entries from that program) and I is a regular expression matching lines that are "boring" and shouldn't be reported. Any trailing whitespace in the syslog line will be removed before matching it against the regex. If I is C<*> I will be applied to all lines, regardless of what program they're from. Finally, a line like: include /path/to/file includes another configuration file at F. The path can be a directory instead of a file, in which case every file in that directory that does not begin with a period is included (in no defined order). The following variables are recognized: =over 4 =item alert The address to which to mail the filtering results. No mail will be sent if all of the input lines are filtered out by the regexes provided. This variable must be set and may not contain any backslashes or single quotes. =item sender The address from which to mail the filtering results (used for the envelope sender and the To: header). If not set, no address will be given to sendmail, which will result in the mail system picking some default value based on the user B is running as. The value of this variable may not contain any backslashes or single quotes. =item subject The value to use for the Subject: header of the filtering results. If you include C<$h> in the value, it will be replaced with the hostname. This variable must be set. =back If there are any input lines that don't match one of the filter rules, they will be mailed to the value of I with a subject given by I. =head1 EXAMPLES Filter /var/log/syslog using /etc/leland/syslog.filter as a configuration file. filter-syslog syslog.filter < /var/log/syslog Here's a sample configuration file that filters out normal Kerberos messages and sends the result to root@example.com with a Subject: header of "example syslog filter results": alert = root@example.com subject = example syslog filter results kftgtd: /^connect from / klogind: /^connect from / kshd: /^Executing .* for principal / kshd: /^Shell process completed\.$/ kshd: /^connect from / Instead of the three separate lines to filter out TCP wrappers messages, one could instead use the line: *: /^connect from / to filter out all syslog lines that begin with "connect from", but this runs a larger risk of filtering out messages that would be of interest. =head1 FILES =over 4 =item F =item F If the configuration file given on the command line isn't an absolute path, it is looked for first in F and then in F. This default can be changed by editing the beginning of this program. =item F =item F =item F<./filter-syslog.conf> The default configuration file, if none is given. The paths will be searched in the above order. =back =head1 BUGS The rule that ignores C<-- MARK --> lines, which are automatically generated by (at least) Solaris syslogd at periodic intervals if requested, could be exploited to hide messages from B that an administrator may want to see. Please again note that this is not a security tool. However, a better regex should be developed and used instead, regardless. There is no protection against inclusion loops (a configuration file that includes another file which then includes the first file). =head1 NOTES As of version 1.20, B removes trailing whitespace from syslog lines before seeing if the lines match the provided regexes. Earlier versions did not do this. You may need to change your regexes when upgrading from 1.19 to 1.20. =head1 SEE ALSO newsyslog(8) The current version of this program is available from its web page at L. =head1 AUTHORS Russ Allbery . Patch for B<--hostname> from Steve Benson. =head1 COPYRIGHT AND LICENSE Copyright 2002, 2003, 2004, 2006, 2007 Board of Trustees, Leland Stanford Jr. University. This program is free software; you may redistribute it and/or modify it under the same terms as Perl itself. =cut