#!/usr/bin/perl -w sub help { print "$0 [-link] files...\n\n", "Outputs groups of duplicate files.\n", "You can't have commas in filenames. Sorry.\n", "-link outputs bash commands to update all duplicates from the first\n", "one.\n", "-unique outputs a list of non-duplicates and first member of each set of\n", "duplicates\n", "-T reads list from a file, -T - from stdin\n", "-dedupe outputs bash commands to remove duplicates.\n\n"; exit; } help unless @ARGV; @options=qw(link unique dedupe); $mode="default"; # Options are mutually exclusive for(@options){ if($ARGV[0] eq "-".$_){ $mode=$_; shift @ARGV; last; } } if($ARGV[0] eq "-T"){ open(F,"<$ARGV[1]") or die("-T must be followed by a valid filename: $!\n"); @filenames=; close F; map(chomp,@filenames); } else{ @filenames=@ARGV; } for(@filenames){ my $x=qx{cksum $_}; @c=split /\s+/,$x; my $cksum=$c[0]; $cksums{$cksum}.="$_,"; } for(keys %cksums){ my $has_dupes=($cksums{$_}=~/(,.*){2}/); push @unique,$cksums{$_} unless $has_dupes; delete $cksums{$_} unless $has_dupes; } map(s/,$//,@unique); for(keys %cksums){ my @files=split/,/,$cksums{$_}; if($mode eq "link"){ my $src=shift @files; for(@files){ print "cp $src $_\n"; } print "\n"; } elsif($mode eq "unique"){ # Just print the first one for each my $src=shift @files; print "$src\n"; } elsif($mode eq "dedupe"){ shift @files; for(@files){ print "rm -f $_\n"; } } else{ print join "\n",@files; print "\n\n"; } } # Now for the ones that had no duplicates at all if($mode eq "unique"){ print join "\n",@unique; print "\n"; }