runWebalizer is used to combine logfiles from mutliple apache servers and submit them to the Webalizer logfile analysis program. This is great if your running load-balanced Apache servers.
The trick to running runWebalizer is to get all your log files in the same directory. This can be done via NFS or ssh.
#!/usr/bin/perl -w
#
# runWebalizer: Webalizer redundant server script.
# Written by: Joel Griffiths joelg at the domain gadgetwiz dot com
#
# Concantenates and rotates log files on redundant virtual web servers
# in unison with webalizer updates.
#
# Copyright (C) 2002 Aver Drivetronics
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# See COPYING.TXT for details.
#
# Original source code can be found at
# http://www.averdrivetronics.com/webalizer
#
# This program is meant to be run by a cronjob.
# Web server log files should be NFS mounted to the directory
# specified by $WEBLOGS.
#
# Example VirtualHost directive in httpd.conf
#<VirtualHost *>
# ServerAdmin webmasterataverdrivetronics.com
# DocumentRoot /home/drivetronics/public_html
# ServerName www.averdrivetronics.com
# ServerAlias averdrivetronics.com www.averdrivetronics.com
# LogFormat "%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \
# \"%{User-Agent}i\"" combined
# CustomLog logs/custom_averdrivetronics.com.log combined
#</VirtualHost>
#
# Log files for each virtual host should be stored as
# custom_DOMAINNAME.log. The DOMAINNAME is extracted
# from the logfile name and used to create a seperate
# sub-directory for each domain.
#
# You can configure a different logfile pattern matching
# mechanism below by changing $LOG_HEADER and $LOG_FOOTER below.
# By default:
# $LOG_HEADER="custom_";
# $LOG_FOOTER="\.log";
#
use strict;
use warnings;
use Fcntl ':mode';
use Data::Dumper;
#############################################################
# USER CONFIG VARIABLES
#############################################################
# Send Debugging Information - Set to 1 to turn on messages
my $DEBUG = 0;
# Enable Logfile Rotation
my $ROTATE_LOGS = 1;
# The number of logfiles to keep
my $MAXLOGS = 4; # Sets the number of logfiles to keep
# The names of your webserver NFS mounted directories
# ie. mount www1.averdrivetronics.com:/var/log/httpd /weblogs/www1
#
# Single Web Server
#my @SERVERS = ( ''');
#
# Redundant servers (Add as many servers as required)
my @SERVERS = ( 'www1', 'www2');
# Logfile configuration information
# Probably would have been better with a regular expression.
# This applies to how the log files are stored. I store my
# logfiles as custom_DOMAINNAME.log. DOMAINNAME is extracted
# and is used to create a directory for each virtual domain.
my $LOG_HEADER = "custom_";
my $LOG_FOOTER = "\.log";
# Rotate the logfile after if reaches a certain size
my $ROTATE_SIZE = 1024;
# Logfile locations
# Single Web Server
#
# my $WEBLOG_DIR = "/var/log/httpd"
#
# NFS mounted directory for for logfiles (redundant web servers)
my $WEBLOG_DIR = "/weblogs";
# Where to store the stats after rotating the logfiles
my $STATS_DIR = "$WEBLOG_DIR/stats";
# Where is the webalizer script
my $WEBALIZER = "/usr/bin/webalizer";
# I need the cat command or the LogSorter command to concatenate the
# logfiles. The LogSorter, available at http://ostermiller.org/webalizer/
# works best for me, but I default to cat to make things easier.
#my $CAT = "/usr/java/j2sdk1.4.0/bin/java -classpath " .
# "/usr/java/apps LogSorter";
my $CAT = "/bin/cat";
# Better make sure that the ssh command can get to each web server after
# rotating the logs. Got to restart the servers. Don't worry about this
# if you have log rotation turned off.
#
# Remember to add this system to the /root/.ssh/autorized_keys file
# on the remote systems if you want to rotate remote logs
my $SSH = "/usr/bin/ssh";
# Command required to restart the httpd service.
#my $RESTART = "/sbin/service httpd restart";
my $RESTART = "kill -HUP `cat /var/run/httpd.pid `";
#############################################################
# EDIT BELOW HERE AT YOUR OWN RISK
#############################################################
my $maxLogRange = $MAXLOGS;
my $restartFlag = 0;
sub dprint {
if($DEBUG == 0) {
return;
}
print @_;
}
# getDomains
#
# Parse domains from the log directories of several
# NFS mounted web servers. The domains are represented
# by a custom_domain.com.log logfile.
#
# The return value is a hash of arrays:
#
# $VAR1 = 'avercomputer.com';
# $VAR2 = [
# '/weblogs/www1/custom_avercomputer.com.log',
# '/weblogs/www2/custom_avercomputer.com.log'
# ];
#
sub getDomains() {
my %domains = ();
my $server = ""; # Used to store current server name
my $filename = ""; ## Filename to test
my $lognumber = 0; ## Rotated lognumber
foreach $server (@SERVERS) {
dprint "WORKING ON SERVER $server\n";
opendir(MYDIR, "$WEBLOG_DIR/$server");
while( $filename = readdir(MYDIR) ) {
# Check filename for custom_domain.subdomain.log
$_ = $filename;
# Delete any extra archived log files
# Probably doesn't belong here, but is faster
# because I don't have to loop through an
# opendir again.
if( m{(($LOG_HEADER.*$LOG_FOOTER)\.(\d+))$} ) {
$maxLogRange = $3 > $maxLogRange ? $3 : $maxLogRange;
if($3 >= $MAXLOGS) {
unlink("$WEBLOG_DIR/$server/$filename");
}
# This is not an archived log file
# so, store it in the hash[x].
} else {
if(s/$LOG_HEADER(.*\..*)$LOG_FOOTER/$1/ ) {
dprint "Logfile $filename found on $server for the $1 domain\n";
push(@{%domains->{$1}}, "$WEBLOG_DIR/$server/$filename");
} else {
#dprint "$filename is NOT A DOMAIN FILE\n";
}
}
}
close(MYDIR);
}
dprint Dumper(%domains);
return %domains;
}
# checkdir
#
# Takes an array of domains and checks the directory
# structure for the output directory. If the directory
# does not yet exist for the domain, create the directory.
#
sub checkdir {
my @domains = @_;
my $dirStatus = 0;
my $outputDir = "";
my $domain = "";
for $domain (@domains) {
$outputDir = $STATS_DIR . "/" . $domain;
# We must have write access to the stats directory
# so that we can add new domains automagically.
unless (-w $STATS_DIR) {
die "Cannot write to stats directory: $STATS_DIR";
}
# Create the directory if it doesn't exist
if (! -e $outputDir ) {
dprint "Making directory $outputDir\n";
mkdir($outputDir, 0755) ||
die "Cannot create directory $outputDir";
} else {
dprint "Directory $outputDir already exists\n";
}
# Now check any existing directories (as a side-effect,
# I get to check the one I just created too) to see
# if they are writable by this program...
#
# In a perfect world, I would check all the files
# in the directories too. TODO
if (! -d $outputDir) {
die "$outputDir is not a directory";
}
if (! -r $outputDir || ! -w $outputDir || ! -x $outputDir) {
die "$outputDir is not a directory";
}
}
}
# runWebalizer
#
# Run the weblizer utility
#
# Takes a hash of arrays where the key is the domain name
# and the array at that key contains the logfile location.
#
# cat www1/custom_averdrivetronics.log www2/custom_averdrivetronics.log | \
# webalizer -D /weblogs/stats/dns.cache -o /weblogs/stats/averdrivetronics
#
sub runWebalizer {
my %domains = @_; # Take one or more domains
my $logfile = "Logfile will go here";
dprint Dumper(%domains);
my @domains; # KEEP PERL HAPPY
my $outputDir = "";
my $domain = "";
dprint "@domains";
my $command = "Webalizer command will go here.";
my $output = "Output will go here.";
foreach $domain (keys %domains) {
dprint "Working on domain for $domain\n";
$outputDir = $STATS_DIR . "/" . $domain;
# Cat logfiles from all servers into a temporary file
my $command = $CAT;
foreach $logfile ( @{%domains->{$domain}}) {
$command .= " $logfile ";
}
$command .= " > $outputDir/combinedLogs.log";
dprint "$command\n";
$output = qx/$command/;
# Run the webalizer on the combined log file
$command = "$WEBALIZER -n \"$domain\" -r \"$domain\" " .
"-s \"\*$domain\" -t \"$domain\" -N 5 " .
"-D $outputDir/dns.cache -o $outputDir $outputDir/combinedLogs.log";
$output = qx/$command/;
dprint "\n-----Output for Command:\n\t$command\n";
dprint "$output\n";
dprint "\n--------------------------------------\n";
}
return;
}
sub checkWE {
my $filename = @_;
}
sub deleteFile {
my $filename = @_;
dprint "Deleting file $filename";
}
sub rotateLogs {
my %domains = @_;
my $lognumber;
my $logfile = "";
my $domain = "";
my $command = "Copy command goes here";
my $output = "Command's output goes here.";
my @filestat;
my $result;
# Remove the extra logfiles before rotating the logs.
# The maxLogRange wa set during the checkdir function
# and is not being set here, because it would require
# the opening of another readdir...
foreach $domain (keys %domains) {
dprint "--- Rotating logfiles for $domain ---\n";
foreach $logfile (@{%domains->{$domain}}) {
# Rotate the file unless it is new
@filestat = stat($logfile);
#$result = (time - $filestat[10]) / 86400;
#dprint " Days since Inode Change Time for $logfile: $result\n";
$result = $filestat[7] / 1024;
dprint " Logfile size for $logfile: $result\n";
next if( $result <= $ROTATE_SIZE);
# Set flag to restart httpd if logfile is rotated
$restartFlag = 1;
# Remove any out of range log files
$command = "rm -f $logfile\[$MAXLOGS\-$maxLogRange\]";
$output = qx/$command/;
dprint "$command\n";
dprint "$output\n";
# Now go through each log file and copy it to
# the next higher log file (eg. .1 becomes .2).
my $nextlognumber = 0;
foreach $lognumber (1 .. ($MAXLOGS - 1)) {
$lognumber = $MAXLOGS - $lognumber; #Count backwards
if(-e "$logfile.$lognumber") {
$nextlognumber = $lognumber + 1;
$command = "cp $logfile.$lognumber " .
"$logfile.$nextlognumber";
$output = qx/$command/;
dprint "$command\n";
dprint "$output\n";
}
}
# Archive the old log file
$command = "cp -f $logfile $logfile.1";
$output = qx/$command/;
dprint "$command\n";
dprint "$output\n";
# Remove archived file
dprint "unlink($logfile)\n";
unlink($logfile);
}
dprint "-------------------------------------\n";
}
}
# Restart the web servers. Assuming that ssh
# works without a password from this system to the
# target system.
#
sub restartServers {
my $server = "Each server name goes here.";
my $command;
my $output;
if($restartFlag == 0) {
dprint "No logfiles were rotated...\n";
dprint "Don't need to restart servers\n";
return;
}
foreach $server (@SERVERS) {
$command = "$SSH root\@$server $RESTART";
$output = qx/$command/;
dprint "$command\n";
dprint "$output\n";
}
}
# Get a list of the domains being logged
my %domains = getDomains();
# Check and create directories for webalizer output
checkdir(keys %domains);
# Run Webalizer for each domain
runWebalizer(%domains);
if($ROTATE_LOGS) {
# Rotate the logs
rotateLogs(%domains);
# Restart the servers
restartServers();
}