postgresql 优化之--不会使用索引

--原始表结构如下：
postgres=# \d+ postgres_s1_log
                                       Table "public.postgres_s1_log"
         Column         |              Type              | Modifiers | Storage  | Stats target | Description 
------------------------+--------------------------------+-----------+----------+--------------+-------------
 log_time               | timestamp(3) without time zone |           | plain    |              | 
 user_name              | text                           |           | extended |              | 
 database_name          | text                           |           | extended |              | 
 process_id             | integer                        |           | plain    |              | 
 connection_from        | text                           |           | extended |              | 
 session_id             | text                           | not null  | extended |              | 
 session_line_num       | bigint                         | not null  | plain    |              | 
 command_tag            | text                           |           | extended |              | 
 session_start_time     | timestamp without time zone    |           | plain    |              | 
 virtual_transaction_id | text                           |           | extended |              | 
 transaction_id         | bigint                         |           | plain    |              | 
 error_severity         | text                           |           | extended |              | 
 sql_state_code         | text                           |           | extended |              | 
 message                | text                           |           | extended |              | 
 detail                 | text                           |           | extended |              | 
 hint                   | text                           |           | extended |              | 
 internal_query         | text                           |           | extended |              | 
 internal_query_pos     | integer                        |           | plain    |              | 
 context                | text                           |           | extended |              | 
 query                  | text                           |           | extended |              | 
 query_pos              | integer                        |           | plain    |              | 
 location               | text                           |           | extended |              | 
 application_name       | text                           |           | extended |              | 
Indexes:
    "pk_postgres_s1_log" PRIMARY KEY,btree (session_id,session_line_num)
    "idx_error_severity" btree (error_severity)
Has OIDs: no




--原始sql
postgres=# explain 
postgres-# select t1.database_name 数据库名,t1.user_name 用户名,cast(substring(t1.message from  position(':' in t1.message)+2  for  position('ms' in t1.message)-position(':' in t1.message)-2)  as float) 执行时间,t2.message  语句,t2.detail  参数,t1.log_time 执行时间,left(t1.connection_from,position(':' in t1.connection_from)-1)   来源IP  
postgres-# from postgres_s1_log t1,postgres_s1_log t2 where   t1.message  like 'duration:%'  and   t1.user_name<>'postgres'  and  t1.database_name='dev' and cast(substring(t1.message from  position(':' in t1.message)+2  for  position('ms' in t1.message)-position(':' in t1.message)-2)  as float)>5000
postgres-# and t1.session_id=t2.session_id and t1.session_line_num=t2.session_line_num+1  order by  3 desc limit 500;
                                                                                                                                                               QUERY PLAN                                                                                                     
                                                           
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-----------------------------------------------------------
 Limit  (cost=820519.37..820520.62 rows=500 width=280)
   ->  Sort  (cost=820519.37..820640.72 rows=48538 width=280)
         Sort Key: (("substring"(t1.message,("position"(t1.message,':'::text) + 2),(("position"(t1.message,'ms'::text) - "position"(t1.message,':'::text)) - 2)))::double precision)
         ->  Merge Join  (cost=797111.93..818100.77 rows=48538 width=280)
               Merge Cond: ((t2.session_id = t1.session_id) AND (((t2.session_line_num + 1)) = t1.session_line_num))
               ->  Sort  (cost=609126.84..612760.01 rows=1453271 width=188)
                     Sort Key: t2.session_id,((t2.session_line_num + 1))
                     ->  Seq Scan on postgres_s1_log t2  (cost=0.00..58028.71 rows=1453271 width=188)
               ->  Materialize  (cost=187985.10..189790.94 rows=361169 width=136)
                     ->  Sort  (cost=187985.10..188888.02 rows=361169 width=136)
                           Sort Key: t1.session_id,t1.session_line_num
                           ->  Seq Scan on postgres_s1_log t1  (cost=0.00..105260.02 rows=361169 width=136)
                                 Filter: ((message ~~ 'duration:%'::text) AND (user_name <> 'postgres'::text) AND (database_name = 'dev'::text) AND (("substring"(message,("position"(message,(("position"(message,'ms'::text) - "position"(message,':'::
text)) - 2)))::double precision > 5000::double precision))
--阅读sql可知，该sql是一个自边接，对t1执行主要的过滤，对t2而言依据t1的结果执行过滤取值，且有组合索引 btree (session_id,session_line_num) ，但由于t1.session_line_num=t2.session_line_num+1 至使t2最多只能使用一部分索引




--修改条件 t1.session_line_num=t2.session_line_num+1 为t1.session_line_num -1 =t2.session_line_num，发现t2还是没有使用索引，
--但由于t1与t2进行join时并没有全部使用过滤条件，注意 Seq Scan on postgres_s1_log t1  (cost=0.00..105260.02 rows=361169 width=136)，可发现t1与t2 join，时，pg估计有36万行数据估执行hash join也是正常的，但此时执行计划已发生变化，
postgres=# explain 
postgres-# select t1.database_name 数据库名,postgres_s1_log t2 where   t1.message  like 'duration:%'  and   t1.user_name<>'postgres'  and  t1.database_name='dev' and cast(substring(t1.message from  position(':' in t1.message)+2  for  position('ms' in t1.message)-position(':' in t1.message)-2)  as float)>5000
postgres-# and t1.session_id=t2.session_id and t1.session_line_num -1 =t2.session_line_num   order by  3 desc limit 500;
                                                                                                                                                         QUERY PLAN                                                                                                           
                                               
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-----------------------------------------------
 Limit  (cost=807502.65..807503.90 rows=500 width=280)
   ->  Sort  (cost=807502.65..807623.99 rows=48538 width=280)
         Sort Key: (("substring"(t1.message,':'::text)) - 2)))::double precision)
         ->  Hash Join  (cost=118146.77..805084.05 rows=48538 width=280)
               Hash Cond: ((t1.session_id = t2.session_id) AND ((t1.session_line_num - 1) = t2.session_line_num))
               ->  Seq Scan on postgres_s1_log t1  (cost=0.00..105260.02 rows=361169 width=136)
                     Filter: ((message ~~ 'duration:%'::text) AND (user_name <> 'postgres'::text) AND (database_name = 'dev'::text) AND (("substring"(message,':'::text)) - 2))
)::double precision > 5000::double precision))
               ->  Hash  (cost=58028.71..58028.71 rows=1453271 width=188)
                     ->  Seq Scan on postgres_s1_log t2  (cost=0.00..58028.71 rows=1453271 width=188)

					 
					 

--既然已知t1,最多只是返回500条数据，则完全可以让t1先执行过滤再与t2进行join，且t2可以使用索引
postgres=# explain
postgres-# with w1 as (
postgres(# select database_name 数据库名,user_name 用户名,cast(substring(message from  position(':' in message)+2  for  position('ms' in message)-position(':' in message)-2)  as float) 执行用时,log_time 执行时间,left(connection_from,position(':' in connection_from)-1) 来源IP,session_id,session_line_num 
postgres(# from postgres_s1_log where   message  like 'duration:%'  and   user_name<>'postgres'  and  database_name='dev'
postgres(# and  cast(substring(message from  position(':' in message)+2  for  position('ms' in message)-position(':' in message)-2)  as float)>5000  order by  3 desc limit 500)
postgres-# select 数据库名,用户名,执行用时,执行时间,来源IP from w1,postgres_s1_log t2 where w1.session_id=t2.session_id and w1.session_line_num -1 =t2.session_line_num;
                                                                                                                                                          QUERY PLAN                                                                                                          
                                                 
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-------------------------------------------------
 Nested Loop  (cost=134093.43..138292.75 rows=13 width=278)
   CTE w1
     ->  Limit  (cost=134091.75..134093.00 rows=500 width=136)
           ->  Sort  (cost=134091.75..134994.67 rows=361169 width=136)
                 Sort Key: (("substring"(postgres_s1_log.message,("position"(postgres_s1_log.message,(("position"(postgres_s1_log.message,'ms'::text) - "position"(postgres_s1_log.message,':'::text)) - 2)))::double precision)
                 ->  Seq Scan on postgres_s1_log  (cost=0.00..116095.09 rows=361169 width=136)
                       Filter: ((message ~~ 'duration:%'::text) AND (user_name <> 'postgres'::text) AND (database_name = 'dev'::text) AND (("substring"(message,':'::text)) - 2
)))::double precision > 5000::double precision))
   ->  CTE Scan on w1  (cost=0.00..10.00 rows=500 width=152)
   ->  Index Scan using pk_postgres_s1_log on postgres_s1_log t2  (cost=0.43..8.37 rows=1 width=188)
         Index Cond: ((session_id = w1.session_id) AND (session_line_num = (w1.session_line_num - 1)))
(10 rows)


--能过原始的cost=807503.90到最后优化的cost=138292.75，性能大概提高了6倍，此时可发现系统的消耗主要在t1,后续需要继续对其进行优化
postgresql 优化之--不会使用索引

猜你在找的Postgre SQL相关文章