Perl script that can be used to calculate min, max, mean, mode, median and standard deviation for a set of log records

Perl posted over 2 years ago by christian

The best thing about this script is that it’s easy to customize, right now it’s optimized for comma delimited data.

   1  use strict;
   2  use warnings;
   3  
   4  # Import stdev, average, mean and other statistical functions
   5  # A copy of http://search.cpan.org/~brianl/Statistics-Lite-3.2/Lite.pm
   6  do('stats.pl');
   7  
   8  my %page_runtimes;
   9  my $delimitor = ';';
  10  my @columns = ("page", "samples", "min", "max", "mean", "mode", "median", "stddev\n");
  11  my $line;
  12  my $first_timestamp, my $last_timestamp;
  13  
  14  # ==========================================
  15  # Parse log file
  16  # ==========================================
  17  
  18  #
  19  # Don't use foreach as it reads the whole file into memory: foreach $line (<>) { 
  20  #
  21  while ($line=<>) {
  22    # remove the newline from $line, otherwise the report will be corrupted.
  23    chomp($line);
  24  
  25    my @columns               = split(';', $line);
  26    my $timestamp             = $columns[0];
  27    my $page_name             = $columns[1];
  28    my $page_runtime          = $columns[2];
  29  
  30    if(!defined($first_timestamp))
  31    {
  32      $first_timestamp = $timestamp;
  33    }
  34  
  35    # print what we find
  36    if(!defined(@{$page_runtimes{$page_name}}))
  37    {
  38      print "Found page '$page_name'\n";
  39    }
  40   
  41    # add page runtimes to one hash
  42    push(@{$page_runtimes{$page_name}}, $page_runtime);
  43   
  44    $last_timestamp = $timestamp;
  45  }
  46  
  47  # ==========================================
  48  # Calculate and print page statistics
  49  # ==========================================
  50  open(PAGE_REPORT, ">report.csv") or die("Could not open report.csv.");
  51  
  52  print PAGE_REPORT "First sample\n".$first_timestamp."\nLast sample\n".$last_timestamp."\n\n";
  53  print PAGE_REPORT join($delimitor, @columns);
  54  
  55  for my $page_name (keys %page_runtimes )
  56  {
  57    my @runtimes = @{$page_runtimes{$page_name}};
  58   
  59    my $samples = @runtimes;
  60    my $min     = min(@runtimes);
  61    my $max     = max(@runtimes);
  62    my $mean    = mean(@runtimes);
  63    my $mode    = mode(@runtimes);
  64    my $median  = median(@runtimes);
  65    my $stddev  = stddev(@runtimes);
  66   
  67    my @data = ($page_name, $samples, $min, $max, $mean, $mode, $median, $stddev);
  68   
  69    my $line = join($delimitor, @data);
  70   
  71    # Use comma instead of decimal
  72    $line =~ s/\./\,/g;
  73   
  74    print PAGE_REPORT "$line\n";
  75  }
  76  close(PAGE_REPORT);

To use it simply pipe some data into it like this:

   1  grep "2008-31-12" silly-data.log | perl analyze.pl

Tagged csv, perl, min, max, mean, log, parser