postgresql分页数据重复问题的深入理解

论坛 期权论坛 脚本     
niminba   2021-5-23 04:43   1215   0

问题背景

许多开发和测试人员都可能遇到过列表的数据翻下一页的时候显示了上一页的数据,也就是翻页会有重复的数据。

如何处理?

这个问题出现的原因是因为选择的排序字段有重复,常见的处理办法就是排序的时候加上唯一字段,这样在分页的过程中数据就不会重复了。 关于这个问题文档也有解释并非是一个bug。而是排序时需要选择唯一字段来做排序,不然返回的结果不确定

排序返回数据重复的根本原因是什么呢?

经常优化sql的同学可能会发现,执行计划里面会有Sort Method这个关键字,而这个关键字就是排序选择的方法。abase的排序分为三种

quicksort                       快速排序   
top-N heapsort  Memory          堆排序
external merge  Disk            归并排序

推测

分页重复的问题和执行计划选择排序算法的稳定性有关。

简单介绍下这三种排序算法的场景:

在有索引的情况下:排序可以直接走索引。 在没有索引的情况下:当表的数据量较小的时候选择快速排序(排序所需必须内存小于work_mem), 当排序有limit,且耗费的内存不超过work_mem时选择堆排序, 当work_mem不够时选择归并排序。

验证推测

1.创建表,初始化数据

abase=# create table t_sort(n_int int,c_id varchar(300));
CREATE TABLE
abase=# insert into t_sort(n_int,c_id) select 100,generate_series(1,9);
INSERT 0 9
abase=# insert into t_sort(n_int,c_id) select 200,generate_series(1,9);
INSERT 0 9
abase=# insert into t_sort(n_int,c_id) select 300,generate_series(1,9);
INSERT 0 9
abase=# insert into t_sort(n_int,c_id) select 400,generate_series(1,9);
INSERT 0 9
abase=# insert into t_sort(n_int,c_id) select 500,generate_series(1,9);
INSERT 0 9
abase=# insert into t_sort(n_int,c_id) select 600,generate_series(1,9);
INSERT 0 9

三种排序

--快速排序 quicksort
abase=# explain analyze select ctid,n_int,c_id from t_sort order by n_int asc;
            QUERY PLAN            
------------------------------------------------------------
 Sort (cost=3.09..3.23 rows=54 width=12) (actual time=0.058..0.061 rows=54 loops=1)
 Sort Key: n_int
 Sort Method: quicksort Memory: 27kB
 -> Seq Scan on t_sort (cost=0.00..1.54 rows=54 width=12) (actual time=0.021..0.032 rows=54 loops=1)
 Planning time: 0.161 ms
 Execution time: 0.104 ms
(6 rows)
--堆排序 top-N heapsort
abase=# explain analyze select ctid,n_int,c_id from t_sort order by n_int asc limit 10;
             QUERY PLAN             
 
------------------------------------------------------------
 Limit (cost=2.71..2.73 rows=10 width=12) (actual time=0.066..0.068 rows=10 loops=1)
 -> Sort (cost=2.71..2.84 rows=54 width=12) (actual time=0.065..0.066 rows=10 loops=1)
   Sort Key: n_int
   Sort Method: top-N heapsort Memory: 25kB
   -> Seq Scan on t_sort (cost=0.00..1.54 rows=54 width=12) (actual time=0.022..0.031 rows=54 loops=1
)
 Planning time: 0.215 ms
 Execution time: 0.124 ms
(7 rows)
--归并排序 external sort Disk
--插入大量值为a的数据
abase=# insert into t_sort(n_int,c_id) select generate_series(1000,2000),'a';
INSERT 0 1001
abase=# set work_mem = '64kB';
SET
abase=# explain analyze select ctid,n_int,c_id from t_sort order by n_int asc;
             QUERY PLAN             
-------------------------------------------------------------
 Sort (cost=18.60..19.28 rows=270 width=12) (actual time=1.235..1.386 rows=1055 loops=1)
 Sort Key: n_int
 Sort Method: external sort Disk: 32kB
 -> Seq Scan on t_sort (cost=0.00..7.70 rows=270 width=12) (actual time=0.030..0.247 rows=1055 loops=1)
 Planning time: 0.198 ms
 Execution time: 1.663 ms
(6 rows)

快速排序

--快速排序
abase=# explain analyze select ctid,n_int,c_id from t_sort order by n_int asc;
            QUERY PLAN            
------------------------------------------------------------
 Sort (cost=3.09..3.23 rows=54 width=12) (actual time=0.058..0.061 rows=54 loops=1)
 Sort Key: n_int
 Sort Method: quicksort Memory: 27kB
 -> Seq Scan on t_sort (cost=0.00..1.54 rows=54 width=12) (actual time=0.021..0.032 rows=54 loops=1)
 Planning time: 0.161 ms
 Execution time: 0.104 ms
(6 rows) 
​
--获取前20条数据
 abase=# select ctid,n_int,c_id from t_sort order by n_int asc limit 20;
  ctid | n_int | c_id 
 --HJH[Z]LYYBKKKKKKKKKJKKKKKJKKKKCB
HLB

HL
B

HLB
JHLCB

JHL
CB
L

HL
B

MJHLB

L

NMJHLBLBBKyd#9h!n-[Z]LHd#c9bcLL9hyl9kBX\HX

XYY\\H\[Z]LHH[Z]LYYBKKKKKKKKKJKKKKKJKKKKCB

HL
B

HLB
JHLCB

JHL
CB
L

HL
B

MJHLB

L

NMJHLB


LHLBLOB]Ba#Y[y/odnmn&{9.c$yd#.$d#.-"ynBB/"y. 9{`+czghamy&y/&"za#{/a+9d#ghl,y.#y/&a#y.g*9.g!yfBBi9mLL9hyl9kod/9&y/o9aykf:/ od.#yyhi)`: %9aykf9.gi&BB]\HHBH\H\BKz+Y[HMBX\HY[NY[HBKKKKKKKKKCB
BHCBKy[Z]LLBX\H^Z[[[^HX

XYY\\H\[Z]LL
H[Z]LUQTHSBKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKCB[Z]
LLKLKMLLL
XX[[YOLLKL
LJCBI[Z]
LLKLK
LLL
XX[[YOLL
LHLJCBI
LLKLMKMLL
XX[[YOLL
L
JCBN\BY]SX\Y[[NZBIH[\
LKMMLL
XX[[YOL
NMJCB[[[YN\^X[[YNLM\BBKy[Z]LLBX\H^Z[[[^HX

XYY\\H\[Z]LL
H[Z]LUQTHSBKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKKCB[Z]
L
K
KLLL
XX[[YOLNNLLJCBI[Z]
L
K
KLLL
XX[[YOLN
NLJCBI
L
KLMLL
XX[[YOLMLLLJCBN\BY]^\[Y\\
BIH[\
LKMMLL
XX[[YOLMJCB[[[YN
\^X[[YN\OB]Bc"d9.9hb9b,L9&{/o9aykf9hb;ny!n$9.odnmn#9odnmn,n`9.#ghb!hmy.#y/&ay"yd#. :hmya9bcy. :hmyl9kxBBc NHYHX\KY\[L\\\X][[[Z][[H^\[\H[TH\X][SRUёBc NHYH\[Y\Y\[Z][[H^\[\H[SRU[ёB[[OH+[BBKal!hmza#yl9ke9..+y.#ye+m.%9iyb$-l9.o'!n Bod#yke#9/a+9蹦+mn&kf9g*9"za#yl9ke8Bod#ykenczghhmza#{f9hi)Y[y.#z-#/odnmn,y.#ykf9g*:a#yl9k.Bn9k)ldc:+yb$`"y#yd#9n#:/99.#y. 9-BKi!9a#yl9kn.:)b,y+n&yc*9nY\J9b9'd#ghb9."
9e+eiynB\H\KB[[OH.[B."l,y+9aj:`yky.n#9&ykyk)9ki.h9% !ymg9am 9kc h9./;,(,(i)/c.+ 
分享到 :
0 人收藏
您需要登录后才可以回帖 登录 | 立即注册

本版积分规则

积分:1060120
帖子:212021
精华:0
期权论坛 期权论坛
发布
内容

下载期权论坛手机APP