PostgreSQL pgfincore
1 背景知识
PostgreSQL 的数据文件通常被分成1GB
的段文件,每个段文件都会被拆分为shared_buffer
内存的页面(一般8KB
),每个 8KB
都会被拆分为操作系统的块(4k)
。
pgfincore 扩展能够查看OS_CACHE
的磁盘块的数量。
1.1 pgfincore 扩展安装
1、编译和安装。
su - postgres
cd /soft
git clone git://git.postgresql.org/git/pgfincore.git
cd pgfincore/
make -j 8 && make install
2、加载插件。
psql -U postgres -d testdb
CREATE EXTENSION pgfincore;
1.2 pgfincore 扩展的函数语法说明
pgsysconf(OUT os_page_size bigint, OUT os_pages_free bigint,
OUT os_total_pages bigint)
RETURNS record
pgsysconf_pretty(OUT os_page_size text, OUT os_pages_free text,
OUT os_total_pages text)
RETURNS record
pgfadvise(IN relname regclass, IN fork text, IN action int,
OUT relpath text, OUT os_page_size bigint,
OUT rel_os_pages bigint, OUT os_pages_free bigint)
RETURNS setof record
pgfadvise_willneed(IN relname regclass,
OUT relpath text, OUT os_page_size bigint,
OUT rel_os_pages bigint, OUT os_pages_free bigint)
RETURNS setof record
pgfadvise_dontneed(IN relname regclass,
OUT relpath text, OUT os_page_size bigint,
OUT rel_os_pages bigint, OUT os_pages_free bigint)
RETURNS setof record
pgfadvise_normal(IN relname regclass,
OUT relpath text, OUT os_page_size bigint,
OUT rel_os_pages bigint, OUT os_pages_free bigint)
RETURNS setof record
pgfadvise_sequential(IN relname regclass,
OUT relpath text, OUT os_page_size bigint,
OUT rel_os_pages bigint, OUT os_pages_free bigint)
RETURNS setof record
pgfadvise_random(IN relname regclass,
OUT relpath text, OUT os_page_size bigint,
OUT rel_os_pages bigint, OUT os_pages_free bigint)
RETURNS setof record
pgfadvise_loader(IN relname regclass, IN fork text, IN segment int,
IN load bool, IN unload bool, IN databit varbit,
OUT relpath text, OUT os_page_size bigint,
OUT os_pages_free bigint, OUT pages_loaded bigint,
OUT pages_unloaded bigint)
RETURNS setof record
pgfadvise_loader(IN relname regclass, IN segment int,
IN load bool, IN unload bool, IN databit varbit,
OUT relpath text, OUT os_page_size bigint,
OUT os_pages_free bigint, OUT pages_loaded bigint,
OUT pages_unloaded bigint)
RETURNS setof record
pgfincore(IN relname regclass, IN fork text, IN getdatabit bool,
OUT relpath text, OUT segment int, OUT os_page_size bigint,
OUT rel_os_pages bigint, OUT pages_mem bigint,
OUT group_mem bigint, OUT os_pages_free bigint,
OUT databit varbit, OUT pages_dirty bigint,
OUT group_dirty bigint)
RETURNS setof record
pgfincore(IN relname regclass, IN getdatabit bool,
OUT relpath text, OUT segment int, OUT os_page_size bigint,
OUT rel_os_pages bigint, OUT pages_mem bigint,
OUT group_mem bigint, OUT os_pages_free bigint,
OUT databit varbit, OUT pages_dirty bigint,
OUT group_dirty bigint)
RETURNS setof record
pgfincore(IN relname regclass,
OUT relpath text, OUT segment int, OUT os_page_size bigint,
OUT rel_os_pages bigint, OUT pages_mem bigint,
OUT group_mem bigint, OUT os_pages_free bigint,
OUT databit varbit, OUT pages_dirty bigint,
OUT group_dirty bigint)
RETURNS setof record
2 获取对象的当前状态
SELECT * FROM pgfincore('public.t01');
//屏幕输出:
-[ RECORD 1 ]-+-------------
relpath | base/5/16993
segment | 0
os_page_size | 4096
rel_os_pages | 38364
pages_mem | 38364
group_mem | 1
os_pages_free | 1457702
databit |
pages_dirty | 0
group_dirty | 0
字段 | 说明 |
---|---|
relpath | 数据文件的相对路径。 |
segment | 段文件个数。从 0 开始。 |
os_page_size | 系统缓存的页面大小。单位:字节。 |
rel_os_pages | 对象的页面总数。 |
pages_mem | OS_CACHE 中对象的已缓存的页面数。 |
group_mem | 相邻pages_mem 的组数。 |
os_pages_free | OS_CACHE 可用的页面数。 |
pages_dirty | 脏页数量。 |
group_dirty | 相邻的脏页的组数。 |
3 将对象加载到OS CACHE 中
3.1 查看 public.film
对象状态
这里可以看出,当前 film 未缓存到 OS CACHE
中。
\c - testdb
SELECT * FROM pgfincore('public.film');
//屏幕输出:
-[ RECORD 1 ]-+-----------------
relpath | base/16384/16533
segment | 0
os_page_size | 4096
rel_os_pages | 110
pages_mem | 0
group_mem | 0
os_pages_free | 1458587
databit |
pages_dirty | 0
group_dirty | 0
3.2 缓存 public.film
到 OS CACHE
SELECT * FROM pgfadvise_willneed('public.film');
//屏幕输出:
relpath | os_page_size | rel_os_pages | os_pages_free
------------------+--------------+--------------+---------------
base/16384/16533 | 4096 | 110 | 1458498
(1 row)
字段 | 说明 |
---|---|
os_page_size | 内存页面大小。 |
rel_os_pages | 数据文件页面个数。 |
os_pages_free | os cache 的剩余空间。 |
3.3 查看 public.film
对象状态
public.film
已缓存
SELECT * FROM pgfincore('public.film');
-[ RECORD 1 ]-+-----------------
relpath | base/16384/16533
segment | 0
os_page_size | 4096
rel_os_pages | 110
pages_mem | 110
group_mem | 1
os_pages_free | 1458455
databit |
pages_dirty | 0
group_dirty | 0
4 使用快照快速还原操作系统页面缓冲区状态
当您需要重启服务器时,由于 shared_buffers
和 OS CACHE
已被清空。所以前期查询可能会变慢。
可以通过快照的备份和回复快速恢复数据库的缓存状态。
4.1 制作快照
CREATE TABLE pgfincore_snapshot as
select 'public.film'::text as relname,*,now() as date_snapshot
from pgfincore('public.film',true);
//屏幕输出:
SELECT 1
4.2 还原快照
SELECT * FROM pgfadvise_loader('public.film', 0, true, true,
(select databit from pgfincore_snapshot
where relname='public.film' and segment = 0));
//屏幕输出:
-[ RECORD 1 ]--+-----------------
relpath | base/16384/16533
os_page_size | 4096
os_pages_free | 1458249
pages_loaded | 110
pages_unloaded | 0
字段 | 说明 |
---|---|
pages_loaded | 已从 OS CACHE 中读取的页面数。 |
pages_unloaded | 已从 OS CACHE 中删除的页面数。 |
5 常用函数示例
5.1 PGSYSCONF
此函数输出:操作系统块大小,操作系统页面缓冲区中的可用页面数。
SELECT * FROM pgsysconf();
//屏幕输出:
os_page_size | os_pages_free | os_total_pages
--------------+---------------+----------------
4096 | 1458259 | 1970050
(1 row)
5.2 pgsysconf_pretty
输出同上,美化格式输出。
SELECT * FROM pgsysconf_pretty();
//屏幕输出:
os_page_size | os_pages_free | os_total_pages
--------------+---------------+----------------
4096 bytes | 5696 MB | 7696 MB
(1 row)
5.3 pgfadvise_willneed
1、此函数在当前关系上设置 WILLNEED 标志。
2、将 public.film
对象尽可能全部加载到 OS CACHE
中,使用此方法可以提高缓冲命中率。
SELECT * FROM pgfadvise_willneed('public.film');
//屏幕输出:
relpath | os_page_size | rel_os_pages | os_pages_free
------------------+--------------+--------------+---------------
base/16384/16533 | 4096 | 110 | 1459334
(1 row)
SELECT * FROM pgfincore('public.film');
//屏幕输出:
-[ RECORD 1 ]-+-----------------
relpath | base/16384/16533
segment | 0
os_page_size | 4096
rel_os_pages | 110
pages_mem | 110
group_mem | 1
os_pages_free | 1457354
databit |
pages_dirty | 0
group_dirty | 0
5.4 pgfadvise_dontneed
1、此函数在当前对象上设置 DONTNEED 标志。
2、将 public.film
对象尽可能全部从 OS CACHE
卸载。
SELECT * FROM pgfadvise_dontneed('public.film');
//屏幕输出:
relpath | os_page_size | rel_os_pages | os_pages_free
------------------+--------------+--------------+---------------
base/16384/16533 | 4096 | 110 | 1459334
(1 row)
//屏幕输出:
SELECT * FROM pgfincore('public.film');
-[ RECORD 1 ]-+-----------------
relpath | base/16384/16533
segment | 0
os_page_size | 4096
rel_os_pages | 110
pages_mem | 0
group_mem | 0
os_pages_free | 1457497
databit |
pages_dirty | 0
group_dirty | 0
5.5 pgfadvise_normal
此函数在当前关系上设置 NORMAL 标志。
5.6 pgfadvise_sequential
此函数在当前关系上设置_顺序_标志。
5.7 pgfadvise_RANDOM
此函数在当前关系上设置 RANDOM 标志。
5.8 pgfadvise_loader
此函数能够直接操作 OS CACHE
页面的。
1、装载内存页面。
2、卸载内存页面。
-- Loading and Unloading
SELECT * FROM pgfadvise_loader('pgbench_accounts', 0, true, true, B'111000');
relpath | os_page_size | os_pages_free | pages_loaded | pages_unloaded
------------------+--------------+---------------+--------------+----------------
base/11874/16447 | 4096 | 408376 | 3 | 3
-- Loading
SELECT * FROM pgfadvise_loader('pgbench_accounts', 0, true, false, B'111000');
relpath | os_page_size | os_pages_free | pages_loaded | pages_unloaded
------------------+--------------+---------------+--------------+----------------
base/11874/16447 | 4096 | 408370 | 3 | 0
-- Unloading
SELECT * FROM pgfadvise_loader('pgbench_accounts', 0, false, true, B'111000');
relpath | os_page_size | os_pages_free | pages_loaded | pages_unloaded
------------------+--------------+---------------+--------------+----------------
base/11874/16447 | 4096 | 408370 | 0 | 3