#!/usr/bin/perl

$dir=$ARGV[0];

sub usage {
die "required argument: directory

symlink-dupes descends into the given directory, scans each file and 
replaces it with a (sym)link to another file with the same content.
You should run the \"symlinks\" command afterwards, to make the symlinks
relative and as short as possible. I recommend the \"-c -s -r\" options and executing multiple times.
"; };

&usage if(!defined($dir));

$PWD = `pwd`;
chop($PWD);
$PWD = $PWD."/";

if($dir =~ /^[^\/]/ ) { $dir=$PWD.$dir; }; # make absolute path

#print "dir ist $dir";

open(find,"find $dir | /usr/bin/file -f - |");

FILE: while(<find>) {
print ".";
   /^([^:]+): (.*)/;
   $thefile = $1;
   $typ = $2;
   if($typ =~ /(symbolic link)|(fifo)|(directory)/) {
      next FILE;
   }
   open(sumfd,"/usr/bin/md5sum \"".$thefile."\" |");
    $sum = <sumfd>;
   close(sumfd);
   if($sum =~ /^(\w+)\W+/) {
      $sum = $1;
   } else {
      print STDERR "Error while trying to examine $thefile, skipping.\n";
      next FILE;
   }
   $probe=$hash{$sum};
   if(!defined($probe)) {
      $hash{$sum}=$thefile; # file is new, feeding database
#      print "NEU: Summe: $sum, Datei: $thefile\n";
   }
   else {
#      print "ALT: Summe: $sum, Datei: $thefile\n";
      print "linking real file ".$probe." to ".$thefile."\n";
      if(!rename($thefile, $thefile.".orig")) {
         print STDERR "Problems renaming $thefile to $thefile.orig, skipping\n";
         next FILE;
      }
      if(!symlink($probe,$thefile)) {
        print STDERR "Problems linking ".&$val($probe)." to $thefile, skipping\n";
         if(rename($thefile.".orig", $thefile)) {
            print STDERR "Moved $thefile.orig back to $thefile.\n";
         }
         next FILE;
      }
      unlink($thefile.".orig");
   }
}
