如何大量修改关联数据并删除重复数据

发布于
2017-07-31

现在库里有两张表，一张是信息表，一张是用户表。
信息表通过用户id计算的code进行关联。
也就是存在一个用户对应多个信息的情况。
大致可以这么描述：
信息表<id,内容,时间,用户code,>
用户表<id,用户code,用户信息>，
目前因为入库时数据的去重处理问题，导致了用户表大量的同一个用户id拥有了不同的用户code。(即同一个用户信息出现了多次，除了用户code和入库时间不一致以外，其他可以理解为完全一致)
我想要删除重复的用户，只保留一个最新的用户信息。
可是在对用户进行处理的时候，如何保证删除掉的那些用户code对应的信息表的code改成了保留的那个code。
量很大，求指教如何进行处理？
eg: 类似于这样的图信息

sqlserver sql语句

阅读 3.5k

3 个回答

得票最新

William

5031521

发布于
2017-08-07

✓ 已被采纳

WITH CTET AS (
SELECT *,ROW_NUMBER()OVER(PARTITION BY ID ORDER BY 入库时间) AS RN
FROM 用户表
)
UPDATE t2
SET 用户code = t3.用户code
FROM CTET AS t1

    INNER JOIN 信息表 AS t2 ON t1.用户code = t2.用户code
    INNER JOIN CTET AS t3 ON t3.ID = t1.ID
                             AND t3.RN = 1

WHERE t1.RN > 1;

Unique

81142446

发布于
2017-08-01

更新于
2017-08-01

--信息表 TableInfo
--用户表 TableUser
--用户code  UserCode
--删除表中多余的重复记录，重复记录是根据单个字段（UserCode）来判断，只留有UserCode最小的记录

delete from [TableUser]
       where 
            Id in (select Id from [TableUser] group by Id having count(Id) > 1) 
       and 
            UserCode not in (select min(UserCode) from [TableUser] group by UserCode having count(UserCode)>1)

ch21st

46716

发布于
2017-08-04

更新于
2017-08-04

建议你在用户表加个字段，将用一个id的用户归并成同一个用户code，比如：

if object_id('tempdb..#user') is not null drop table #user
if object_id('tempdb..#DAT') is not null drop table #DAT
create table #user(id VARCHAR(10),code VARCHAR(10),u_name NVARCHAR(10),CreatedDate DATETIME)
insert into #user
select 'A_111','1514',N'A','20170731' union all
select 'A_111','1314',N'A','20170730' union all
select 'A_111','1111',N'A','20170718' union all
select 'A_222','1320',N'B','20170718' union all
select 'A_333','1450',N'C','20170718' union all
select 'A_333','1350',N'C','20170719'
CREATE TABLE #DAT(id INT,DAT VARCHAR(100),UserCode VARCHAR(10),CreatedDate DATETIME)
insert into #DAT
select 1,'AAA','1514','20170730' union all
select 2,'SSS','1314','20170729' union all
select 3,'BBB','1111','20170728' union all
select 4,'QQQ','1111','20170727' union all
select 5,'WWW','1320','20170726' union all
select 6,'EEE','1320','20170725' union all
select 7,'RRR','1450','20170724' union all
select 8,'TTT','1350','20170723' 
select * from #DAT
if col_length('tempdb..#USER','AKA_Code') IS NULL
alter table #USER add AKA_Code varchar(10)
 
update u set u.AKA_Code=u.aka from (
   SELECT *,min(code)over(partition by id) as aka FROM #USER 
) as u 
select * from #user
 
update d set d.UserCode=u.AKA_Code
from #DAT as d inner join #user as u on d.UserCode=u.code
where d.UserCode!=u.AKA_Code
select * from #DAT

id	DAT	UserCode	CreatedDate
1	AAA	1514	2017-07-30 00:00:00:000
2	SSS	1314	2017-07-29 00:00:00:000
3	BBB	1111	2017-07-28 00:00:00:000
4	QQQ	1111	2017-07-27 00:00:00:000
5	WWW	1320	2017-07-26 00:00:00:000
6	EEE	1320	2017-07-25 00:00:00:000
7	RRR	1450	2017-07-24 00:00:00:000
8	TTT	1350	2017-07-23 00:00:00:000

修改前：

id	code	u_name	CreatedDate	AKA_Code
A_111	1514	A	2017-07-31 00:00:00:000	1111
A_111	1314	A	2017-07-30 00:00:00:000	1111
A_111	1111	A	2017-07-18 00:00:00:000	1111
A_222	1320	B	2017-07-18 00:00:00:000	1320
A_333	1450	C	2017-07-18 00:00:00:000	1350
A_333	1350	C	2017-07-19 00:00:00:000	1350

修改后：

id	DAT	UserCode	CreatedDate
1	AAA	1111	2017-07-30 00:00:00:000
2	SSS	1111	2017-07-29 00:00:00:000
3	BBB	1111	2017-07-28 00:00:00:000
4	QQQ	1111	2017-07-27 00:00:00:000
5	WWW	1320	2017-07-26 00:00:00:000
6	EEE	1320	2017-07-25 00:00:00:000
7	RRR	1350	2017-07-24 00:00:00:000
8	TTT	1350	2017-07-23 00:00:00:000

撰写回答

你尚未登录，登录后可以

和开发者交流问题的细节
关注并接收问题和回答的更新提醒
参与内容的编辑和改进，让解决方法与时俱进

推荐问题

如何大量修改关联数据并删除重复数据

你尚未登录，登录后可以

sql语句怎么取每种物料的第一行数据？

如何查询A表中非正常的名称列表？