PostgreSQL table bloat check

Use the following script to estimate table bloat. This script requires up-to-date table statistics. Otherwise, the estimate is inaccurate.

  WITH 
  
 constants 
  
 AS 
  
 ( 
  
 -- define some constants for sizes of things 
  
 -- for reference down the query and easy maintenance 
  
 SELECT 
  
 current_setting 
 ( 
 'block_size' 
 ):: 
 numeric 
  
 AS 
  
 bs 
 , 
  
 23 
  
 AS 
  
 hdr 
 , 
  
 8 
  
 AS 
  
 ma 
 ), 
 no_stats 
  
 AS 
  
 ( 
  
 -- screen out table who have attributes 
  
 -- which don't have stats, such as JSON 
  
 SELECT 
  
 table_schema 
 , 
  
 table_name 
 , 
  
  
 n_live_tup 
 :: 
 numeric 
  
 as 
  
 est_rows 
 , 
  
 pg_table_size 
 ( 
 relid 
 ):: 
 numeric 
  
 as 
  
 table_size 
  
 FROM 
  
 information_schema 
 . 
 columns 
  
 JOIN 
  
 pg_stat_user_tables 
  
 as 
  
 psut 
  
 ON 
  
 table_schema 
  
 = 
  
 psut 
 . 
 schemaname 
  
 AND 
  
 table_name 
  
 = 
  
 psut 
 . 
 relname 
  
 LEFT 
  
 OUTER 
  
 JOIN 
  
 pg_stats 
  
 ON 
  
 table_schema 
  
 = 
  
 pg_stats 
 . 
 schemaname 
  
 AND 
  
 table_name 
  
 = 
  
 pg_stats 
 . 
 tablename 
  
 AND 
  
 column_name 
  
 = 
  
 attname 
  
  
 WHERE 
  
 attname 
  
 IS 
  
 NULL 
  
 AND 
  
 table_schema 
  
 NOT 
  
 IN 
  
 ( 
 'pg_catalog' 
 , 
  
 'information_schema' 
 ) 
  
 GROUP 
  
 BY 
  
 table_schema 
 , 
  
 table_name 
 , 
  
 relid 
 , 
  
 n_live_tup 
 ), 
 null_headers 
  
 AS 
  
 ( 
  
 -- calculate null header sizes 
  
 -- omitting tables which don't have complete stats 
  
 -- and attributes which aren't visible 
  
 SELECT 
  
 hdr 
 + 
 1 
 + 
 ( 
 sum 
 ( 
 case 
  
 when 
  
 null_frac 
 <> 
 0 
  
 THEN 
  
 1 
  
 else 
  
 0 
  
 END 
 ) 
 / 
 8 
 ) 
  
 as 
  
 nullhdr 
 , 
  
 SUM 
 (( 
 1 
 - 
 null_frac 
 ) 
 * 
 avg_width 
 ) 
  
 as 
  
 datawidth 
 , 
  
 MAX 
 ( 
 null_frac 
 ) 
  
 as 
  
 maxfracsum 
 , 
  
 schemaname 
 , 
  
 tablename 
 , 
  
 hdr 
 , 
  
 ma 
 , 
  
 bs 
  
 FROM 
  
 pg_stats 
  
 CROSS 
  
 JOIN 
  
 constants 
  
 LEFT 
  
 OUTER 
  
 JOIN 
  
 no_stats 
  
 ON 
  
 schemaname 
  
 = 
  
 no_stats 
 . 
 table_schema 
  
 AND 
  
 tablename 
  
 = 
  
 no_stats 
 . 
 table_name 
  
 WHERE 
  
 schemaname 
  
 NOT 
  
 IN 
  
 ( 
 'pg_catalog' 
 , 
  
 'information_schema' 
 ) 
  
 AND 
  
 no_stats 
 . 
 table_name 
  
 IS 
  
 NULL 
  
 AND 
  
 EXISTS 
  
 ( 
  
 SELECT 
  
 1 
  
 FROM 
  
 information_schema 
 . 
 columns 
  
 WHERE 
  
 schemaname 
  
 = 
  
 columns 
 . 
 table_schema 
  
 AND 
  
 tablename 
  
 = 
  
 columns 
 . 
 table_name 
  
 ) 
  
 GROUP 
  
 BY 
  
 schemaname 
 , 
  
 tablename 
 , 
  
 hdr 
 , 
  
 ma 
 , 
  
 bs 
 ), 
 data_headers 
  
 AS 
  
 ( 
  
 -- estimate header and row size 
  
 SELECT 
  
 ma 
 , 
  
 bs 
 , 
  
 hdr 
 , 
  
 schemaname 
 , 
  
 tablename 
 , 
  
 ( 
 datawidth 
 + 
 ( 
 hdr 
 + 
 ma 
 - 
 ( 
 case 
  
 when 
  
 hdr 
 % 
 ma 
 = 
 0 
  
 THEN 
  
 ma 
  
 ELSE 
  
 hdr 
 % 
 ma 
  
 END 
 ))):: 
 numeric 
  
 AS 
  
 datahdr 
 , 
  
 ( 
 maxfracsum 
 * 
 ( 
 nullhdr 
 + 
 ma 
 - 
 ( 
 case 
  
 when 
  
 nullhdr 
 % 
 ma 
 = 
 0 
  
 THEN 
  
 ma 
  
 ELSE 
  
 nullhdr 
 % 
 ma 
  
 END 
 ))) 
  
 AS 
  
 nullhdr2 
  
 FROM 
  
 null_headers 
 ), 
 table_estimates 
  
 AS 
  
 ( 
  
 -- make estimates of how large the table should be 
  
 -- based on row and page size 
  
 SELECT 
  
 schemaname 
 , 
  
 tablename 
 , 
  
 bs 
 , 
  
 reltuples 
 :: 
 numeric 
  
 as 
  
 est_rows 
 , 
  
 relpages 
  
 * 
  
 bs 
  
 as 
  
 table_bytes 
 , 
  
 CEIL 
 (( 
 reltuples 
 * 
  
 ( 
 datahdr 
  
 + 
  
 nullhdr2 
  
 + 
  
 4 
  
 + 
  
 ma 
  
 - 
  
 ( 
 CASE 
  
 WHEN 
  
 datahdr 
 % 
 ma 
 = 
 0 
  
 THEN 
  
 ma 
  
 ELSE 
  
 datahdr 
 % 
 ma 
  
 END 
 ) 
  
 ) 
 / 
 ( 
 bs 
 - 
 20 
 ))) 
  
 * 
  
 bs 
  
 AS 
  
 expected_bytes 
 , 
  
 reltoastrelid 
  
 FROM 
  
 data_headers 
  
 JOIN 
  
 pg_class 
  
 ON 
  
 tablename 
  
 = 
  
 relname 
  
 JOIN 
  
 pg_namespace 
  
 ON 
  
 relnamespace 
  
 = 
  
 pg_namespace 
 . 
 oid 
  
 AND 
  
 schemaname 
  
 = 
  
 nspname 
  
 WHERE 
  
 pg_class 
 . 
 relkind 
  
 = 
  
 'r' 
 ), 
 estimates_with_toast 
  
 AS 
  
 ( 
  
 -- add in estimated TOAST table sizes 
  
 -- estimate based on 4 toast tuples per page because we don't have 
  
 -- anything better.  also append the no_data tables 
  
 SELECT 
  
 schemaname 
 , 
  
 tablename 
 , 
  
  
 TRUE 
  
 as 
  
 can_estimate 
 , 
  
 est_rows 
 , 
  
 table_bytes 
  
 + 
  
 ( 
  
 coalesce 
 ( 
 toast 
 . 
 relpages 
 , 
  
 0 
 ) 
  
 * 
  
 bs 
  
 ) 
  
 as 
  
 table_bytes 
 , 
  
 expected_bytes 
  
 + 
  
 ( 
  
 ceil 
 ( 
  
 coalesce 
 ( 
 toast 
 . 
 reltuples 
 , 
  
 0 
 ) 
  
 / 
  
 4 
  
 ) 
  
 * 
  
 bs 
  
 ) 
  
 as 
  
 expected_bytes 
  
 FROM 
  
 table_estimates 
  
 LEFT 
  
 OUTER 
  
 JOIN 
  
 pg_class 
  
 as 
  
 toast 
  
 ON 
  
 table_estimates 
 . 
 reltoastrelid 
  
 = 
  
 toast 
 . 
 oid 
  
 AND 
  
 toast 
 . 
 relkind 
  
 = 
  
 't' 
 ), 
 table_estimates_plus 
  
 AS 
  
 ( 
 -- add some extra metadata to the table data 
 -- and calculations to be reused 
 -- including whether we can estimate it 
 -- or whether we think it might be compressed 
  
 SELECT 
  
 current_database 
 () 
  
 as 
  
 databasename 
 , 
  
 schemaname 
 , 
  
 tablename 
 , 
  
 can_estimate 
 , 
  
  
 est_rows 
 , 
  
 CASE 
  
 WHEN 
  
 table_bytes 
 > 
 0 
  
 THEN 
  
 table_bytes 
 :: 
 NUMERIC 
  
 ELSE 
  
 NULL 
 :: 
 NUMERIC 
  
 END 
  
 AS 
  
 table_bytes 
 , 
  
 CASE 
  
 WHEN 
  
 expected_bytes 
 > 
 0 
  
  
 THEN 
  
 expected_bytes 
 :: 
 NUMERIC 
  
 ELSE 
  
 NULL 
 :: 
 NUMERIC 
  
 END 
  
 AS 
  
 expected_bytes 
 , 
  
 CASE 
  
 WHEN 
  
 expected_bytes 
 > 
 0 
  
 AND 
  
 table_bytes 
 > 
 0 
  
 AND 
  
 expected_bytes 
  
< = 
  
 table_bytes 
  
 THEN 
  
 ( 
 table_bytes 
  
 - 
  
 expected_bytes 
 ):: 
 NUMERIC 
  
 ELSE 
  
 0 
 :: 
 NUMERIC 
  
 END 
  
 AS 
  
 bloat_bytes 
  
 FROM 
  
 estimates_with_toast 
  
 UNION 
  
 ALL 
  
 SELECT 
  
 current_database 
 () 
  
 as 
  
 databasename 
 , 
  
  
 table_schema 
 , 
  
 table_name 
 , 
  
 FALSE 
 , 
  
  
 est_rows 
 , 
  
 table_size 
 , 
  
 NULL 
 :: 
 NUMERIC 
 , 
  
 NULL 
 :: 
 NUMERIC 
  
 FROM 
  
 no_stats 
 ), 
 bloat_data 
  
 AS 
  
 ( 
  
 -- do final math calculations and formatting 
  
 select 
  
 current_database 
 () 
  
 as 
  
 databasename 
 , 
  
 schemaname 
 , 
  
 tablename 
 , 
  
 can_estimate 
 , 
  
  
 table_bytes 
 , 
  
 round 
 ( 
 table_bytes 
 / 
 ( 
 1024 
 ^ 
 2 
 ):: 
 NUMERIC 
 , 
 3 
 ) 
  
 as 
  
 table_mb 
 , 
  
 expected_bytes 
 , 
  
 round 
 ( 
 expected_bytes 
 / 
 ( 
 1024 
 ^ 
 2 
 ):: 
 NUMERIC 
 , 
 3 
 ) 
  
 as 
  
 expected_mb 
 , 
  
 round 
 ( 
 bloat_bytes 
 * 
 100 
 / 
 table_bytes 
 ) 
  
 as 
  
 pct_bloat 
 , 
  
 round 
 ( 
 bloat_bytes 
 / 
 ( 
 1024 
 :: 
 NUMERIC 
 ^ 
 2 
 ), 
 2 
 ) 
  
 as 
  
 mb_bloat 
 , 
  
 table_bytes 
 , 
  
 expected_bytes 
 , 
  
 est_rows 
  
 FROM 
  
 table_estimates_plus 
 ) 
 -- filter output for bloated tables 
 SELECT 
  
 databasename 
 , 
  
 schemaname 
 , 
  
 tablename 
 , 
  
 can_estimate 
 , 
  
 est_rows 
 , 
  
 pct_bloat 
 , 
  
 mb_bloat 
 , 
  
 table_mb 
 FROM 
  
 bloat_data 
 -- this where clause defines which tables actually appear 
 -- in the bloat chart 
 -- example below filters for tables which are either 50% 
 -- bloated and more than 20mb in size, or more than 25% 
 -- bloated and more than 4GB in size 
 WHERE 
  
 ( 
  
 pct_bloat 
  
> = 
  
 50 
  
 AND 
  
 mb_bloat 
  
> = 
  
 10 
  
 ) 
  
 OR 
  
 ( 
  
 pct_bloat 
  
> = 
  
 25 
  
 AND 
  
 mb_bloat 
  
> = 
  
 1000 
  
 ) 
 AND 
  
 tablename 
  
 = 
  
 '%' 
 ORDER 
  
 BY 
  
 mb_bloat 
  
 DESC 
  
 NULLS 
  
 LAST 
 ; 
 
Create a Mobile Website
View Site in Mobile | Classic
Share by: