#! /usr/local/bin/oraperl 
'di';
'ig00';
###############################################################################
# @(#) find_dups :  find duplicate keys in a table
# @(#) SunOS deep sun4m (jstander)  
# @(#) loc: /home/tuna/bin
# @(#) $Revision 1.0 $ (jstander 03.08.93): new 
###############################################################################
# name
#       
#       
  $Synopsis = <<X;
	find_dups [ -ic ] [ user/password ] table_name key [ key .. ]
X
# description
#       find_dups finds duplicate records in table table_name for the
#       single or composite keys given.  Keys are column names in table_name 
#       
  $Options = <<X;
	-i		Create temporary index on key columns if necessary
	-c	 	Count records, don't report them.	
	user/password   Oracle user/password (not required for OPS$ login)
	table_name	name of table to search 
	key..		one or more column names in table
X
# author
#       Jeff.Stander@ml.csiro.au CSIRO Division Of Fisheries, Hobart,
#       Tasmania 7001, Australia
###############################################################################

require "getopts.pl";
require "nsu.pl";		# delete this line if not at CSIRO marine labs
require "sqlx.pl";

#------------------------------------------------------------------------------
# Process command line arguments
#------------------------------------------------------------------------------
&Getopts('zhic');

if ( $opt_z || $opt_h || $#ARGV<1 ) {                
        print STDERR "\nusage: $Synopsis";
	print "$Options" if $opt_h; 
        exit;
}

if ( $ARGV[0] =~ /\// ) {
	($user,$passwd) = split('/',$ARGV[0]);
	shift;
}

$lda=&ora_login("",$user,$passwd) || die "$Name: $ora_errstr\n";

($table_name=$ARGV[0]) =~ tr/a-z/A-Z/;
shift;

&ok_tab($table_name) || die "$Name: table not in database: $table_name\n";

while ( $#ARGV >= 0 ) {
        ($coln = $ARGV[0]) =~ tr/a-z/A-Z/;
        $w=&ok_col($table_name,$coln)
		|| die "$Name: column not in table $table_name: $coln\n";
	$w = $w < length($coln) ? length($coln) : $w;
	$fmt .= "%${w}s ";
	$col[$cnt++] = $coln;
	push(colnames,$coln);
	$coln =~ s/./-/g;
	push(underbar,$coln);
        shift;
}
chop $fmt;
$fmt .= "\n";

if ( $opt_i ) {
	$index  = "create index tmpndx$$ on $table_name(";
	$index .= join (',',@colnames);
	$index .= ")";
	&sqlx($index,$lda) ;
	$dropindex = "drop index tmpndx$$"; 
	undef $dropindex if $ora_errno == -1408; # column list already indexed
}

$qh="SELECT ";
if ( $opt_c ) {
	$qh .= "count(a.$col[$i]) "
}
else {
	for ($i=0; $i<$cnt; $i++ ) { 
		$qh .= "a.$col[$i]," ;
	}
}
chop $qh;
$qh .= " FROM $table_name a WHERE EXISTS (SELECT 1 FROM $table_name b WHERE ";
for ($i=0; $i<$cnt; $i++ ) { 
        $column = $col[$i];
        $qh .= "$and a.$column=b.$column ";
        $and = "AND";
}

#print "\n";
$qh .= "AND A.rowid > B.rowid)";
#print "$qh\n"; exit;

if ( $opt_c ) {
	print "" . (&sqlx($qh,$lda))[0] . " duplicate keys found\n";
}
else {
	$csr = &ora_open($lda,$qh) || die "$Name: $ora_errstr\n";

	while ( @result=&ora_fetch($csr) ) {
		printf($fmt,@colnames) if !$first; 
		printf($fmt,@underbar) if !$first; $first=1;
		printf($fmt,@result);
	}

	print "No duplicate keys found\n" if !$first;
	&ora_close($csr);
}

&sqlx($dropindex,$lda) if $dropindex;
&ora_logoff($lda);

###############################################################
# END OF PERL SCRIPT                                          #
###############################################################

    # These next few lines are legal in both Perl and nroff.

.00;                       # finish .ig
 
'di           \" finish diversion--previous line must be blank
.nr nl 0-1    \" fake up transition to first page again
.nr % 0         \" start at page 1
'; __END__ ##### From here on it's a standard manual page #####
.PU
.TH FIND_DUPS 1 "August 4, 1993" "CSIRO Fisheries"
.SH NAME
find_dups \- find duplicate keys in a table
.SH SYNOPSIS
.ll +8
.B find_dups 
[ 
.B -ic
]
.I table_name key
[
.I key ..
]
.SH DESCRIPTION
.I Find_dups
finds duplicate records in table 
.I table_name 
for a single or composite 
.I key 
given on the command line,
where the key consists of one or more column names in 
.IR table_name .

If the
.B -i
option is set
.I find_dups 
will create an index on the columns given.  It will first
check to see that an index does not already exist and, if
creates one, it will be dropped on exit.

The
.B -c
option cause
.I find_dups
to count the number of records with duplicate keys and report
the total.  Normally the key values are reported.
.SH OPTIONS
.TP
.B -i
Create an index on the key columns before searching.
.PD
.TP
.B -c
Report only the number of records found with duplicatea keys.
.PD
.SH ENVIRONMENT
No environment variables are used.
.SH AUTHOR
Jeff.Stander@ml.csiro.au 
.br
Copyright (c) 1993 CSIRO Division of Fisheries, Hobart Tasmania, AUSTRALIA