;#
;# Copyright (c) 1996, Ikuo Nakagawa.
;# All rights reserved.
;#
;# $Id: lsparse.pl,v 1.4 1997/05/22 08:38:55 ikuo Exp $
;#
;# Last update:
;#	1996/10/17 by Ikuo Nakagawa
;# Description:
;#	lsparse.pl - parsing an output of /bin/ls.
;#
;# timelocal.pl should be called in the package 'main'
require "timelocal.pl";

;# the name of this package
package lsparse;

;# we use log package
require "log.pl";

;# prototypes
;# sub findpath($);
;# sub lslR_open($);
;# sub lslR_close();
;# sub lslR_getdir(;$);
;# sub lslR_parse($;$);
;# sub lsparse($);
;# sub lstime($);

;# name of month, and conversion table
@nameofmonth = qw(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec);
@nametomonth{map(lc $_, @nameofmonth)} = (0..11);
$regexp_month = join('|', @nameofmonth);

;# set this to debug this package
$debug = 0;

;#
$path_gzip = &findpath("gzip");
$path_compress = &findpath("compress");

;#
sub findpath {
 my $foo = shift;
 my @path = split(':', $ENV{'PATH'});
 my $d;
 for $d (@path) {
  return "$d/$foo" if -x "$d/$foo";
 }
 for $d (qw(/usr/local/bin /usr/gnu/bin /usr/contrib/bin /usr/ucb)) {
  return "$d/$foo" if -x "$d/$foo";
 }
 $foo;
}

;#
;# If prototypes are commented out, lslR_close must be defined
;# before lslR_open.
;#
sub lslR_close {
 close(LSLR), undef $LSLR if defined($LSLR);
 1;
}
;#
;# lslR_open
;# Open lslR file, which may be .gz or .Z compressed file.
;# lslR_open must be called before lslR_getdir.
;#
sub lslR_open {
 my($lslR) = @_;

 &lslR_close;
 $LSLR = $lslR =~ /\.gz$/ ? "$path_gzip -cd $lslR|" :
	 $lslR =~ /\.Z$/ ? "$path_compress -cd $lslR|" :
	 $lslR;
 open(LSLR) || do { log::putl("NOTICE", "open: $!"); return undef };
 1;
}

;#
;# Parsing ls-lR format
;# CAUTION: This subroutine is not re-entrant!!
;# lslR_getdir() returns a dirinfo for a directory on each call.
;# Following variables are re-used:
;#  *LSLR, $topdir, $curdir;
;#
sub lslR_getdir {
 local($subst) = @_;
 local($_, $s, $info, $total);

 return undef if !defined($LSLR);

 while (<LSLR>) {

  chomp;

  if (/^$/) {			# end-of-directory
   return ($curdir, \%{$info}) if defined($curdir);
   next; # search next directory name...
  }

  if (/^total (\d+)/) {		# we may skip this
   $total = $1;
   next;
  }

  ;# normal entry
  if (/^(.)([-r][-w][-xsS][-r][-w][-xsS][-r][-w][-xtT])\s*/) {
   my($type, $mode) = ($1, $2);
   if (!defined($s = &lsparse($_))) {
     next;			# ignore error
   }
   $info->{$s->{'file'}} = $s;
   if (defined($curdir)) {
    $s->{'path'} = "$curdir/$s->{'file'}";
   }
  }

  ;# new directory
  if (/:$/) {
   $curdir = $`;
   $curdir = &{$subst}($curdir) if defined($subst) && ref($subst) eq 'CODE';
   if (!defined($topdir)) {
    $topdir = $curdir =~ m%/[^/]+$% ? $` : '.';
    log::putl("DEBUG", "setting topdir to $topdir") if $debug;
    if (defined($info)) {
     for $key (keys %{$info}) {
      $s = $info->{$key};
      $s->{'path'} = "$topdir/$s->{'file'}";
     }
     return ($topdir, \%{$info});
    }
   }
  } # end of if (/:$/) { ... }

  ;# other case?
 }

 ;#
 if (!defined($topdir)) {
  $topdir = $curdir = '.';
  log::putl("DEBUG", "setting topdir to $topdir") if $debug;
 }

 ;#
 if (defined($info)) {
  for $key (keys %{$info}) {
   $s = $info->{$key};
   $s->{'path'} = "$curdir/$s->{'file'}";
  }
  return ($curdir, \%{$info});
 }

 ;#
 return undef;
}

;#
;# lslR_parse() parses ls-lR format file. This routine do
;#  call lslR_open;
;#  call lslR_getdir for ls-lR directory hierarcy
;#  call lslR_close;
;#  and returns directory information for the top directory
;#  of ls-lR.
;#
sub lslR_parse {
 my($lslR, $subst) = @_;
 my($d, $s, $x, $y, @d);

 # Let's open lslR file
 &lslR_open($lslR) || return undef;

 # Get top directory information...
 ($d, $x) = &lslR_getdir($subst);

 # Check directory information
 if (!defined($x)) {
  &lslR_close;
  log::putl("INFO", "lslR_parse: empty directory");
  return undef;
 }

 # DEBUG...
 if ($debug > 0) {
  log::putl("DEBUG", "%5 directory = $d");
  for (sort keys %{$x}) {
   log::putl("DEBUG", "%5  $x->{$_}->{'path'}");
  }
 }

 # We support only the case of $topdir eq '.'
 if ($d ne '.') {
  &lslR_close;
  log::putl("INFO", "lslR_parse: top directory `$d' must be `.'");
  return undef;
 }

 # Scan directory hierarcy
 while (($d, $s) = &lslR_getdir($subst)) {

  # Check result
  last if !defined($s);

  # DEBUG...
  if ($debug > 0) {
   log::putl("DEBUG", "%5 directory = $d");
   for (sort keys %{$s}) {
    log::putl("DEBUG", "%5  $s->{$_}->{'path'}");
   }
  }

  # search directory information HASH
  $y = $x;
  @d = split('/', $d);
  while (@d) {
   my $f = shift(@d);
   next if $f eq '.';
   if (!(defined($y->{$f}) && $y->{$f}->{'type'} eq 'directory')) {
    &lslR_close;
    log::putl("DEBUG", "lslR_parse: $y->{'path'} don't have $f");
    log::putl("INFO", "lslR_parse: unsupported format");
    return undef;
   }
   if (@d) { # something remains, down...
    $y = \%{$y->{$f}->{'hash'}};
   } else { # set HASH
    $y->{$f}->{'hash'} = $s;
   }
  }
 }

 # close, and...
 &lslR_close;

 # return the top of directory hierarcy
 $x;
}

;# parsing a line from outputs of `ls'
sub lsparse {
 local($_) = @_;
 local($[);
 my($s, $x, $tmp);
 my($type, $mode, $nlink, $owner, $group, $size, $date, $file);

 # backup
 $x = $_;

 # check DOS dirstyle
 if (/^\d\d-\d\d-\d\d\s+\d\d:\d\d(am|pm)?/i) { # DOS
  ($date, $_) = ($&, $');
  if (s/\s+\<DIR\>\s+//) {
   $s->{'type'} = 'directory';
  } elsif (s/\s+(\d+)\s+//) {
   ($s->{'type'}, $s->{'size'}, $s->{'date'}) = ('file', $1, $date);
  } else {
   log::putl("INFO", "unknown DOS format: \"$x\""), return undef;
  }
  $s->{'file'} = $_;
  return $s;
 }

 # UNIX style - check file modes
 s/^(.)([-r][-w].[-r][-w].[-r][-w].)\s*// || do {
  log::putl("DEBUG", "invalid format: \"$x\"");
  return undef;
 };
 $type = $1;
 $tmp = $2;

 # mode to octal value
 $mode = 0;
 $mode |= 04000 if $tmp =~ /^..s/;
 $mode |= 02000 if $tmp =~ /^.....s/;
 $mode |= 01000 if $tmp =~ /^........t/;
 $mode |= 00400 if $tmp =~ /^r/;
 $mode |= 00200 if $tmp =~ /^.w/;
 $mode |= 00100 if $tmp =~ /^..[xs]/;
 $mode |= 00040 if $tmp =~ /^...r/;
 $mode |= 00020 if $tmp =~ /^....w/;
 $mode |= 00010 if $tmp =~ /^.....[xs]/;
 $mode |= 00004 if $tmp =~ /^......r/;
 $mode |= 00002 if $tmp =~ /^.......w/;
 $mode |= 00001 if $tmp =~ /^........[xs]/;
 $mode = sprintf("%04o", ($mode & 0777));

 # find date string
 /($regexp_month)\s+(\d?\d)\s+((\d\d\d\d)|(\d+:\d+))/i || do {
  log::putl("DEBUG", "date not found: \"$x\""), return undef;
 };

  # save some values
 ($_, $date, $file) = ($`, $&, $');

 #
 s/\s*(\d+)\s*$// || do {
  log::putl("DEBUG", "size not found: \"$x\""), return undef;
 };
 $size = $1;
 s/^(\d+)\s*// || do {
  log::putl("DEBUG", "nlink not found: \"$x\""), return undef;
 };
 $nlink = $1;
 ($owner, $group) = /\s+/ ? ($`, $') : ($_, '');

 # set filename to $_
 $_ = $file;

 # kill leading/trailing spaces
 s/^\s+//; s/\s+$//;

 # logging
 if ($debug > 1) {
  log::putl("DEBUG", "parsing [$x]");
  log::putl("DEBUG", " ... filename = $_");
  log::putl("DEBUG", " ... type = $type");
  log::putl("DEBUG", " ... mode = $mode");
  log::putl("DEBUG", " ... owner = $owner");
  log::putl("DEBUG", " ... group = $group");
  log::putl("DEBUG", " ... size = $size");
  log::putl("DEBUG", " ... date = $date");
  my @a = reverse((localtime(&lstime($date) - 3600 * 9))[0..5]);
  $a[0] += 1900;
  $a[1]++;
  log::putl("DEBUG", " ... time = ".
    sprintf("%04d-%02d-%02d %02d:%02d:%02d", @a));
 }

 #
 if ($type eq 'd') {
  $s->{'type'} = 'directory';
  $s->{'file'} = $_;
  if ($debug > 1) {
   log::putl("DEBUG", "parse: directory $s->{'file'}");
  }
  return $s;
 }
 if ($type eq 'l') {
  $s->{'type'} = 'symlink';
  ($s->{'file'}, $s->{'linkto'}) = /\s+->\s+/ ? ($`, $') : ($_, '');
  if ($debug > 1) {
   log::putl("DEBUG", "parse: symlink $s->{'file'} -> $s->{'linkto'}");
  }
  return $s;
 }
 if ($type eq '-') {
  $s->{'type'} = 'file';
  $s->{'file'} = $_;
  $s->{'mode'} = $mode;
  $s->{'owner'} = $owner;
  $s->{'group'} = $group;
  $s->{'size'} = $size;
  $s->{'date'} = $date;
  $s->{'time'} = &lstime($date);
  log::putl("DEBUG", "parse: file $s->{'file'}") if $debug > 2;
  return $s;
 }
 return undef;
}

;# parse date strings and convert to time
sub lstime {
 local($_) = @_;

 my($sec, $min, $hour, $day, $mon, $year) =
  /^($regexp_month)\s+(\d+)\s+((\d\d\d\d)|((\d+):(\d+)))$/oi ?
   (0, $7, $6, $2, $1, $4) : # Unix ls
  /^($regexp_month)\s+(\d+)\s+(\d+):(\d+):(\d+)\s+(\d\d\d\d)$/oi ?
   ($5, $4, $3, $2, $1, $6) : # Unix ls -T
  /^(\d+)\s+($regexp_month)\s+((\d\d\d\d)|((\d+):(\d+)))$/oi ?
   (0, $7, $6, $1, $2, $4) : # dls and NetWare
  /(\d+)-(\d+)-(\d+)\s+(\d+):(\d+)(AM|PM)?/oi ?
   (0, $5, ($6 eq 'PM' ? $4 + 12 : $4) , $2, $1, $3) :
  /(\d+)-(\S+)-(\d+)\s+(\d+):(\d+)/oi ?
   (0, $5, $4, $1, $2, $3) : # VMS style
  /^\w+\s+($regexp_month)\s+(\d+)\s+(\d+):(\d+):(\d+)\s+(\d+)/oi ?
   ($5, $4, $3, $2, $1, $6) : # CTAN style (and HTTP)
  /^\w+,\s+(\d+)-($regexp_month)-(\d+)\s+(\d+):(\d+):(\d+)/oi ?
   ($6, $5, $4, $1, $2, $3) : # another HTTP
  undef;

 my $month = ($mon =~ /^\d+$/ ? $& - 1 : $nametomonth{lc $mon});

 if (!defined($year) || $year !~ /\d\d\d\d/){
  my($l_month, $l_year) = (gmtime)[4, 5];
  $year = $l_year;
  $year-- if $month > $l_month;
 }
 $year -= ($year >= 2000 ? 2000 : $year >= 1970 ? 1900 : 0);

 $sec = 0 unless defined($sec) && $sec >= 0 && $sec < 60;
 $min = 0 unless defined($min) && $min >= 0 && $min < 60;
 $hour = 0 unless defined($hour) && $hour >= 0 && $hour < 24;

 return &main::timegm($sec, $min, $hour, $day, $month, $year);
}

;#
;# search a directory information for the specified directory.
;#
sub scan_init {
 my($lslR, $subst) = @_;

 ;# First log
 log::putl("INFO", "scan: initialize") if $debug;

 # Let's open lslR file
 &lslR_open($lslR) || return undef;

 undef %scan_cache;
 undef $scan_subst;
 $scan_subst = $subst if defined($subst) && ref($subst) eq 'CODE';
 $scan_run = 1;
}

;#
;#
;#
sub scan_compare {
 my($x, $y) = @_;

 $x =~ s,/,\001,g;
 $y =~ s,/,\001,g;
 $x cmp $y;
}

;#
;# search a directory entry for scanning.
;#
sub scan_search {
 my($dir) = @_;

 # check scan directory
 if (exists($scan_cache{$dir})) {
  my $x = $scan_cache{$dir};
  delete($scan_cache{$dir});
  log::putl("INFO", "scan: + $dir... found in cache.") if $debug;
  return $x;
 }

 # read next information
 while ($scan_run) {
  my($d, $x) = &lslR_getdir($scan_subst);

  last if !defined($x);
  if ($d eq $dir) {
   log::putl("INFO", "scan: + $dir... found by lslR_getdir.") if $debug;
   return $x;
  }
  if (&scan_compare($d, $dir) < 0) {
   log::putl("DEBUG", "scan: + $dir... was skipped.") if $debug;
  } else { ## $d (> $dir) may be scaned later
   log::putl("DEBUG", "scan: + $dir... was cached.") if $debug;
   $scan_cache{$d} = $x;
  }
 }

 &lslR_close;
 $scan_run = 0;
 log::putl("INFO", "scan: ? $dir... not found.") if $debug;
 return undef;
}

;#
sub scan_done {
 &lslR_close if $scan_run;
 for $d (sort keys %scan_cache) {
  log::putl("DEBUG", "scan: - $d");
  delete($scan_cache{$d});
 }
 undef %scan_cache;
 undef $scan_subst;
 undef $scan_run;
 log::putl("INFO", "scan: done") if $debug;
}

;# success on this package
1;
