Friday, November 25, 2011

How to Remove Duplicate Record

CREATE TABLE Test
(
    PK        INT        NOT NULL,
    ID        INT        NOT NULL,
    NAME    VARCHAR(10) NOT NULL
)

INSERT INTO Test VALUES
(1,10,'A'),
(2,20,'B'),
(3,30,'C'),
(4,10,'A'),
(5,30,'C'),
(6,10,'A')


--// Query to get all record set
SELECT * FROM Test

--// To get duplicate Recordset with count of each duplicate record.
SELECT ID,NAME,COUNT(ID) Cnt
FROM Test
GROUP BY ID,NAME HAVING COUNT(ID) > 1


 --Method 1: Using RowCount

--// Deleting duplicate record without one record for each duplicate.
DECLARE @Cnt INT,@ID INT

DECLARE Cur CURSOR FOR
    SELECT ID,COUNT(ID) Cnt FROM Test
    GROUP BY ID,NAME HAVING COUNT(ID) > 1
OPEN Cur
FETCH NEXT FROM Cur INTO @ID,@Cnt          
WHILE (@@FETCH_STATUS = 0)
BEGIN
    SET @Cnt = (@Cnt - 1)
    SET ROWCOUNT @Cnt
  
    DELETE FROM Test WHERE ID = @ID
  
    SET ROWCOUNT 0
  
FETCH NEXT FROM Cur INTO @ID,@Cnt
END
CLOSE Cur
DEALLOCATE Cur



--// Method 2 : Common Table Expression
--// Deleting duplicate record without one record for each duplicate.

;WITH cte
AS
(
    SELECT ID,DENSE_RANK() OVER (PARTITION BY ID,NAME ORDER BY PK) Ranks FROM Test

)

DELETE FROM cte WHERE Ranks > 1

If not PK field in table , we can write the code with NewID()

;WITH cte
AS
(
    SELECT ID,DENSE_RANK() OVER (PARTITION BY ID,NAME ORDER BY NewID()) Ranks  FROM Test

)

DELETE FROM cte WHERE Ranks > 1

--// Method 3 : Using Correlated Subquery (Removing All Duplicates)
DELETE t FROM Test t WHERE ID IN (SELECT ID FROM Test WHERE ID = t.ID GROUP BY ID,NAME HAVING COUNT(ID) > 1)




--// Method 4 : Using Join (Removing All Duplicates)
DELETE a
FROM Test a
INNER JOIN
(
    SELECT ID,COUNT(ID) Cnt
    FROM Test
    GROUP BY ID,NAME HAVING COUNT(ID) > 1
) t
ON a.ID = t.ID

No comments:

Post a Comment