Register now and start sharing your code snippets.
-->

Perl script that can be used to calculate min, max, mean, mode, median and standard deviation for a set of log records

Perl posted 3 months ago by christian

The best thing about this script is that it’s easy to customize, right now it’s optimized for comma delimited data.

   1  use strict;
   2  use warnings;
   3  
   4  # Import stdev, average, mean and other statistical functions
   5  # A copy of http://search.cpan.org/~brianl/Statistics-Lite-3.2/Lite.pm
   6  do('stats.pl');
   7  
   8  my %page_runtimes;
   9  my $delimitor = ';';
  10  my @columns = ("page", "samples", "min", "max", "mean", "mode", "median", "stddev\n");
  11  my $line;
  12  my $first_timestamp, my $last_timestamp;
  13  
  14  # ==========================================
  15  # Parse log file
  16  # ==========================================
  17  foreach $line (<>) {
  18    # remove the newline from $line, otherwise the report will be corrupted.
  19    chomp($line);
  20  
  21    my @columns               = split(';', $line);
  22    my $timestamp             = $columns[0];
  23    my $page_name             = $columns[1];
  24    my $page_runtime          = $columns[2];
  25  
  26    if(!defined($first_timestamp))
  27    {
  28      $first_timestamp = $timestamp;
  29    }
  30  
  31    # print what we find
  32    if(!defined(@{$page_runtimes{$page_name}}))
  33    {
  34      print "Found page '$page_name'\n";
  35    }
  36   
  37    # add page runtimes to one hash
  38    push(@{$page_runtimes{$page_name}}, $page_runtime);
  39   
  40    $last_timestamp = $timestamp;
  41  }
  42  
  43  # ==========================================
  44  # Calculate and print page statistics
  45  # ==========================================
  46  open(PAGE_REPORT, ">report.csv") or die("Could not open report.csv.");
  47  
  48  print PAGE_REPORT "First sample\n".$first_timestamp."\nLast sample\n".$last_timestamp."\n\n";
  49  print PAGE_REPORT join($delimitor, @columns);
  50  
  51  for my $page_name (keys %page_runtimes )
  52  {
  53    my @runtimes = @{$page_runtimes{$page_name}};
  54   
  55    my $samples = @runtimes;
  56    my $min     = min(@runtimes);
  57    my $max     = max(@runtimes);
  58    my $mean    = mean(@runtimes);
  59    my $mode    = mode(@runtimes);
  60    my $median  = median(@runtimes);
  61    my $stddev  = stddev(@runtimes);
  62   
  63    my @data = ($page_name, $samples, $min, $max, $mean, $mode, $median, $stddev);
  64   
  65    my $line = join($delimitor, @data);
  66   
  67    # Use comma instead of decimal
  68    $line =~ s/\./\,/g;
  69   
  70    print PAGE_REPORT "$line\n";
  71  }
  72  close(PAGE_REPORT);

To use it simply pipe some data into it like this:

   1  grep "2008-31-12" silly-data.log | perl analyze.pl

Tagged csv, perl, min, max, mean, log, parser