We will start by preparing the data. We will be creating a table sales_team_emails and inserting the data into it.
#CREATE THE TABLECREATE TABLE sales_team_emails (
sales_person_id INT AUTO_INCREMENT,
sales_person_name VARCHAR(255),
sales_person_email VARCHAR(255),
PRIMARY KEY (sales_person_id)
);
#INSERT THE DATA
INSERT INTO sales_team_emails (sales_person_name,sales_person_email)
VALUES("Aditi","aditi@managementbliss.com");
INSERT INTO sales_team_emails (sales_person_name,sales_person_email)
VALUES("Furan T","Furan.Abey@me.com");
INSERT INTO sales_team_emails (sales_person_name,sales_person_email)
VALUES("Veronica Hedge","Veronika.Loud@me.com");
INSERT INTO sales_team_emails (sales_person_name,sales_person_email)
VALUES("Atharv","Atharv@managementbliss.com");
INSERT INTO sales_team_emails (sales_person_name,sales_person_email)
VALUES("Erick","Erick@mine.com","45 Parking West London");
INSERT INTO sales_team_emails (sales_person_name,sales_person_email)
VALUES("Rasmus","Rasmus.Thomas@mine.com");
INSERT INTO sales_team_emails (sales_person_name,sales_person_email)
VALUES("Aditi Sharma","aditi@managementbliss.com");
INSERT INTO sales_team_emails (sales_person_name,sales_person_email)
VALUES("Furan T","Furan.Abey@me.com");
INSERT INTO sales_team_emails (sales_person_name,sales_person_email)
VALUES("Veronica Longman","Veronika.Loud@me.com");
INSERT INTO sales_team_emails (sales_person_name,sales_person_email)
VALUES("Simon Rappid","Simon@bond.com");
INSERT INTO sales_team_emails (sales_person_name,sales_person_email)
VALUES("Simon Rappid","Simon@bond.com");
SELECT * FROM sales_team_emails ORDER BY sales_person_email;
Delete the duplicate rows but keep latest : using GROUP BY and MAX
One way to delete the duplicate rows but retaining the latest ones is by using MAX() function and GROUP BY clause. Observe the below query and output.
DELETE FROM sales_team_emails
WHERE sales_person_id NOT IN (
SELECT * FROM (
SELECT MAX(sales_person_id) FROM sales_team_emails
GROUP BY sales_person_email
) AS s_alias
);
Delete the duplicate rows but keep latest : using JOINS
DELETE s1 FROM sales_team_emails s1,
sales_team_emails s2
WHERE
s1.sales_person_id < s2.sales_person_id
AND s1.sales_person_email = s2.sales_person_email;
Delete the duplicate row but keep oldest : using JOINS
DELETE s1 FROM sales_team_emails s1,
sales_team_emails s2
WHERE
s1.sales_person_id > s2.sales_person_id
AND s1.sales_person_email = s2.sales_person_email;
Delete the duplicate row but keep oldest : using ROW_NUMBER()
DELETE FROM sales_team_emails
WHERE
sales_person_id IN (
SELECT
sales_person_id
FROM (
SELECT
sales_person_id,
ROW_NUMBER() OVER (
PARTITION BY sales_person_email
ORDER BY sales_person_email) AS row_num
FROM
sales_team_emails
) s_alias
WHERE row_num > 1
);
0 Comments