标签
Postgresql,ECS,阿里云,部署,物理镜像,流复制,快照备份,备份验证,自动清理
背景
介绍在阿里云ECS环境中,实现一个非常简单,但是可用性和可靠性满足一般企业要求的Postgresql环境。
包括:
2、包括一个物理流复制备库
6、监控请自建
部署环境介绍
1、ECS
111.111.111.199 (主) 111.111.111.223 (备)
2、云盘
400 GB
3、软件
OS: CentOS 7.4 x64 Postgresql: 10.1 ZFS: 0.7.3
部署流程
双机
1、安装软件
yum -y install coreutils glib2 lrzsz dstat sysstat e4fsprogs xfsprogs ntp readline-devel zlib-devel openssl-devel pam-devel libxml2-devel libxslt-devel python-devel tcl-devel gcc gcc-c++ make smartmontools flex bison perl-devel perl-ExtUtils* openldap-devel jadetex openjade bzip2
2、修改内核配置
vi /etc/sysctl.conf # add by digoal.zhou fs.aio-max-nr = 1048576 fs.file-max = 76724600 kernel.sem = 4096 2147483647 2147483646 512000 kernel.shmall = 107374182 kernel.shmmax = 274877906944 kernel.shmmni = 819200 net.core.netdev_max_backlog = 10000 net.core.rmem_default = 262144 # The default setting of the socket receive buffer in bytes. net.core.rmem_max = 4194304 # The maximum receive socket buffer size in bytes net.core.wmem_default = 262144 # The default setting (in bytes) of the socket send buffer. net.core.wmem_max = 4194304 # The maximum send socket buffer size in bytes. net.core.somaxconn = 4096 net.ipv4.tcp_max_syn_backlog = 4096 net.ipv4.tcp_keepalive_intvl = 20 net.ipv4.tcp_keepalive_probes = 3 net.ipv4.tcp_keepalive_time = 60 net.ipv4.tcp_mem = 8388608 12582912 16777216 net.ipv4.tcp_fin_timeout = 5 net.ipv4.tcp_synack_retries = 2 net.ipv4.tcp_syncookies = 1 net.ipv4.tcp_timestamps = 1 net.ipv4.tcp_tw_recycle = 0 net.ipv4.tcp_tw_reuse = 1 net.ipv4.tcp_max_tw_buckets = 262144 net.ipv4.tcp_rmem = 8192 87380 16777216 net.ipv4.tcp_wmem = 8192 65536 16777216 net.nf_conntrack_max = 1200000 net.netfilter.nf_conntrack_max = 1200000 vm.dirty_background_bytes = 204800000 vm.dirty_expire_centisecs = 3000 vm.dirty_ratio = 90 vm.dirty_writeback_centisecs = 50 vm.swappiness = 0 vm.mmap_min_addr = 65536 vm.overcommit_memory = 0 vm.overcommit_ratio = 90 vm.swappiness = 0 vm.zone_reclaim_mode = 0 net.ipv4.ip_local_port_range = 40000 65535 fs.nr_open=20480000
3、修改资源限制
# vi /etc/security/limits.conf * soft nofile 1024000 * hard nofile 1024000 * soft nproc unlimited * hard nproc unlimited * soft core unlimited * hard core unlimited * soft memlock unlimited * hard memlock unlimited
4、关闭透明大页
chmod +x /etc/rc.d/rc.local vi /etc/rc.local # 追加 if test -f /sys/kernel/mm/transparent_hugepage/enabled; then echo never > /sys/kernel/mm/transparent_hugepage/enabled fi
5、分配磁盘
parted -s /dev/vdb mklabel gpt parted -s /dev/vdb mkpart primary 1MiB 100%
主机
1、创建文件系统
mkfs.ext4 /dev/vdb1 -m 0 -O extent,uninit_bg -E lazy_itable_init=1 -T largefile -L data01
2、加载文件系统
mkdir /data01 vi /etc/fstab LABEL=data01 /data01 ext4 defaults,noatime,nodiratime,nodelalloc,barrier=0,data=writeback 0 0 mount -a
备机
1、安装ZFS
yum install -y http://download.zfsonlinux.org/epel/zfs-release.el7_4.noarch.rpm yum install -y kernel-devel-3.10.0-693.2.2.el7.x86_64 yum install -y zfs
2、手动加载zfs模块
/usr/sbin/modprobe zfs
3、创建zpool和zfs for 数据目录,归档目录
zpool create -o ashift=12 zp1 vdb1 zfs set compression=lz4 zp1 zfs set canmount=off zp1 zfs set atime=off zp1 zfs create -o mountpoint=/data01 zp1/data01 zfs create -o mountpoint=/pg_arch zp1/pg_arch
4、自动加载zpool
vi /etc/rc.local /usr/sbin/modprobe zfs /usr/sbin/zpool import zp1
双机
1、安装Postgresql与EPEL软件 yum 仓库
yum install -y https://dl.fedoraproject.org/pub/epel/epel-release-latest-7.noarch.rpm yum install -y https://download.postgresql.org/pub/repos/yum/10/redhat/rhel-7-x86_64/pgdg-centos10-10-2.noarch.rpm
2、安装数据库软件与周边软件
yum install -y postgresql10* postgis24_10* pgrouting_10* osm2pgrouting_10* plpgsql_check_10* pgbadger pg_top10* hypopg_10* citus_10* cstore_fdw_10* pg_pathman10* orafce10* pgfincore10* pgbson10* pgmemcache-10* pldebugger10* plv8_10* geoip10* ip4r10* pg_repack10* pgsphere10* plr10*
3、修改环境变量
su - postgres vi ~/.bash_profile export PS1="$USER@`/bin/hostname -s`-> " export PGPORT=3433 export PGDATA=/data01/pg_root$PGPORT export LANG=en_US.utf8 export PGHOME=/usr/pgsql-10 export LD_LIBRARY_PATH=$PGHOME/lib:/lib64:/usr/lib64:/usr/local/lib64:/lib:/usr/lib:/usr/local/lib:$LD_LIBRARY_PATH export DATE=`date +"%Y%m%d%H%M"` export PATH=$PGHOME/bin:$PATH:. export MANPATH=$PGHOME/share/man:$MANPATH export PGHOST=$PGDATA export PGUSER=postgres export PGDATABASE=postgres alias rm='rm -i' alias ll='ls -lh' unalias vi
1、创建数据目录,修改权限
cd /data01
mkdir pg_root3433
mkdir -p pg_arch/pg3433
chown postgres:postgres /data01/*
ln -s /data01/pg_arch /pg_arch
cd /data01
mkdir pg_root3433
chown postgres:postgres /data01/*
cd /pg_arch
mkdir pg3433
chown postgres:postgres /pg_arch/pg3433
1、初始化数据库集群
su - postgres
initdb -D $PGDATA -U postgres --locale=en_US.UTF8 -E UTF8
cd $PGDATA
vi postgresql.auto.conf
listen_addresses = '0.0.0.0'
port = 3433 # 监听端口
max_connections = 2000 # 最大允许的连接数
superuser_reserved_connections = 10
unix_socket_directories = '.'
unix_socket_permissions = 0700
tcp_keepalives_idle = 60
tcp_keepalives_interval = 60
tcp_keepalives_count = 10
shared_buffers = 16GB # 共享内存,建议设置为系统内存的1/4 .
maintenance_work_mem = 512MB # 系统内存超过32G时,建议设置为1GB。超过64GB时,建议设置为2GB。超过128GB时,建议设置为4GB。
work_mem = 64MB # 1/4 主机内存 / 256 (假设256个并发同时使用work_mem)
wal_buffers = 128MB # min( 2047MB,shared_buffers/32 )
dynamic_shared_memory_type = posix
vacuum_cost_delay = 0
bgwriter_delay = 10ms
bgwriter_lru_maxpages = 500
bgwriter_lru_multiplier = 5.0
effective_io_concurrency = 0
max_worker_processes = 128
max_parallel_workers_per_gather = 16 # 建议设置为主机cpu核数的一半。
max_parallel_workers = 16 # 看业务AP和TP的比例,以及AP TP时间交错分配。实际情况调整。例如 主机cpu cores-2
wal_level = replica
fsync = on
synchronous_commit = off
full_page_writes = on # 支持原子写超过BLOCK_SIZE的块设备,在对齐后可以关闭。或者支持cow的文件系统可以关闭。
wal_writer_delay = 10ms
wal_writer_flush_after = 1MB
checkpoint_timeout = 30min
max_wal_size = 32GB # shared_buffers*2
min_wal_size = 8GB # max_wal_size/4
archive_mode = always
archive_command = '/bin/date'
hot_standby = on
max_wal_senders = 10
max_replication_slots = 10
wal_receiver_status_interval = 1s
max_logical_replication_workers = 4
max_sync_workers_per_subscription = 2
random_page_cost = 1.2
parallel_tuple_cost = 0.1
parallel_setup_cost = 1000.0
min_parallel_table_scan_size = 8MB
min_parallel_index_scan_size = 512kB
effective_cache_size = 32GB # 建议设置为主机内存的5/8。
log_destination = 'csvlog'
logging_collector = on
log_directory = 'log'
log_filename = 'postgresql-%a.log'
log_truncate_on_rotation = on
log_rotation_age = 1d
log_rotation_size = 0
log_min_duration_statement = 5s
log_checkpoints = on
log_connections = on # 如果是短连接,并且不需要审计连接日志的话,建议OFF。
log_disconnections = on # 如果是短连接,并且不需要审计连接日志的话,建议OFF。
log_error_verbosity = verbose
log_line_prefix = '%m [%p] '
log_lock_waits = on
log_statement = 'ddl'
log_timezone = 'PRC'
log_autovacuum_min_duration = 0
autovacuum_max_workers = 5
autovacuum_vacuum_scale_factor = 0.1
autovacuum_analyze_scale_factor = 0.05
autovacuum_freeze_max_age = 1000000000
autovacuum_multixact_freeze_max_age = 1200000000
autovacuum_vacuum_cost_delay = 0
statement_timeout = 0 # 单位ms,s,min,h,d. 表示语句的超时时间,0表示不限制。
lock_timeout = 0 # 单位ms,d. 表示锁等待的超时时间,0表示不限制。
idle_in_transaction_session_timeout = 2h # 单位ms,d. 表示空闲事务的超时时间,0表示不限制。
vacuum_freeze_min_age = 50000000
vacuum_freeze_table_age = 800000000
vacuum_multixact_freeze_min_age = 50000000
vacuum_multixact_freeze_table_age = 800000000
datestyle = 'iso,ymd'
timezone = 'PRC'
lc_messages = 'en_US.UTF8'
lc_monetary = 'en_US.UTF8'
lc_numeric = 'en_US.UTF8'
lc_time = 'en_US.UTF8'
default_text_search_config = 'pg_catalog.simple'
shared_preload_libraries='pg_stat_statements,pg_pathman'
cp $PGHOME/share/recovery.conf.sample ./
mv recovery.conf.sample recovery.done
vi recovery.done
recovery_target_timeline = 'latest'
standby_mode = on
primary_conninfo = 'host=111.111.111.223 port=3433 user=rep password=xxxxxxxxxx'
restore_command = 'BASEDIR="/pg_arch/pg3433" ; find $BASEDIR -name %f -exec cp {} %p \\;'
3、修改数据库认证权限访问控制ACL,pg_hba.conf
vi pg_hba.conf
host all all 0.0.0.0/0 md5
host replication rep 0.0.0.0/0 md5
4、启动数据库
pg_ctl start
5、创建流复制,数据库角色
psql -n
create role rep replication login encrypted password 'xxxxxxxxxx';
1、创建备库
su - postgres
pg_basebackup -D $PGDATA -F p -h 111.111.111.199 -p 3433 -U rep
cd $PGDATA
vi postgresql.auto.conf
archive_command = 'DT="/pg_arch/pg3433/`date +%F`" ; test ! -d $DT && mkdir -p $DT ; test ! -f $DT/%f && cp %p $DT/%f'
mv recovery.done recovery.conf
vi recovery.conf
primary_conninfo = 'host=111.111.111.199 port=3433 user=rep password=xxxxxxxxxx'
5、修改数据目录权限
su - root
chmod 700 /data01/pg_root3433
1、系统启动时,自动启动数据库
vi /etc/rc.local
# 追加
su - postgres -c "pg_ctl start"
2、重启ECS验证
reboot
su - postgres
psql
postgres=# show max_connections ;
max_connections
-----------------
2000
(1 row)
1、配置@R_301_350@(快照备份)
su - root
mkdir script
vi /root/script/zfs_snap.sh
#!/bin/bash
date +%F%T
# 自动创建、清理快照
/sbin/zfs snapshot zp1/data01@`date +%Y%m%d`
/sbin/zfs destroy zp1/data01@`date +%Y%m%d -d '-10 day'`
/sbin/zfs list -t snapshot
# 自动清理归档
rm -rf /pg_arch/pg3433/`date +%F -d '-11 day'`
date +%F%T
2、设置脚本权限
chmod 500 /root/script/zfs_snap.sh
3、设置@R_301_350@任务,每天备份一次
# crontab -e
1 8 * * * /root/script/zfs_snap.sh >> /root/zfs_snap.log 2>&1
1、初始化数据库集群
su - postgres
initdb -D $PGDATA -U postgres --locale=en_US.UTF8 -E UTF8
cd $PGDATA vi postgresql.auto.conf listen_addresses = '0.0.0.0' port = 3433 # 监听端口 max_connections = 2000 # 最大允许的连接数 superuser_reserved_connections = 10 unix_socket_directories = '.' unix_socket_permissions = 0700 tcp_keepalives_idle = 60 tcp_keepalives_interval = 60 tcp_keepalives_count = 10 shared_buffers = 16GB # 共享内存,建议设置为系统内存的1/4 . maintenance_work_mem = 512MB # 系统内存超过32G时,建议设置为1GB。超过64GB时,建议设置为2GB。超过128GB时,建议设置为4GB。 work_mem = 64MB # 1/4 主机内存 / 256 (假设256个并发同时使用work_mem) wal_buffers = 128MB # min( 2047MB,shared_buffers/32 ) dynamic_shared_memory_type = posix vacuum_cost_delay = 0 bgwriter_delay = 10ms bgwriter_lru_maxpages = 500 bgwriter_lru_multiplier = 5.0 effective_io_concurrency = 0 max_worker_processes = 128 max_parallel_workers_per_gather = 16 # 建议设置为主机cpu核数的一半。 max_parallel_workers = 16 # 看业务AP和TP的比例,以及AP TP时间交错分配。实际情况调整。例如 主机cpu cores-2 wal_level = replica fsync = on synchronous_commit = off full_page_writes = on # 支持原子写超过BLOCK_SIZE的块设备,在对齐后可以关闭。或者支持cow的文件系统可以关闭。 wal_writer_delay = 10ms wal_writer_flush_after = 1MB checkpoint_timeout = 30min max_wal_size = 32GB # shared_buffers*2 min_wal_size = 8GB # max_wal_size/4 archive_mode = always archive_command = '/bin/date' hot_standby = on max_wal_senders = 10 max_replication_slots = 10 wal_receiver_status_interval = 1s max_logical_replication_workers = 4 max_sync_workers_per_subscription = 2 random_page_cost = 1.2 parallel_tuple_cost = 0.1 parallel_setup_cost = 1000.0 min_parallel_table_scan_size = 8MB min_parallel_index_scan_size = 512kB effective_cache_size = 32GB # 建议设置为主机内存的5/8。 log_destination = 'csvlog' logging_collector = on log_directory = 'log' log_filename = 'postgresql-%a.log' log_truncate_on_rotation = on log_rotation_age = 1d log_rotation_size = 0 log_min_duration_statement = 5s log_checkpoints = on log_connections = on # 如果是短连接,并且不需要审计连接日志的话,建议OFF。 log_disconnections = on # 如果是短连接,并且不需要审计连接日志的话,建议OFF。 log_error_verbosity = verbose log_line_prefix = '%m [%p] ' log_lock_waits = on log_statement = 'ddl' log_timezone = 'PRC' log_autovacuum_min_duration = 0 autovacuum_max_workers = 5 autovacuum_vacuum_scale_factor = 0.1 autovacuum_analyze_scale_factor = 0.05 autovacuum_freeze_max_age = 1000000000 autovacuum_multixact_freeze_max_age = 1200000000 autovacuum_vacuum_cost_delay = 0 statement_timeout = 0 # 单位ms,s,min,h,d. 表示语句的超时时间,0表示不限制。 lock_timeout = 0 # 单位ms,d. 表示锁等待的超时时间,0表示不限制。 idle_in_transaction_session_timeout = 2h # 单位ms,d. 表示空闲事务的超时时间,0表示不限制。 vacuum_freeze_min_age = 50000000 vacuum_freeze_table_age = 800000000 vacuum_multixact_freeze_min_age = 50000000 vacuum_multixact_freeze_table_age = 800000000 datestyle = 'iso,ymd' timezone = 'PRC' lc_messages = 'en_US.UTF8' lc_monetary = 'en_US.UTF8' lc_numeric = 'en_US.UTF8' lc_time = 'en_US.UTF8' default_text_search_config = 'pg_catalog.simple' shared_preload_libraries='pg_stat_statements,pg_pathman'
cp $PGHOME/share/recovery.conf.sample ./ mv recovery.conf.sample recovery.done vi recovery.done recovery_target_timeline = 'latest' standby_mode = on primary_conninfo = 'host=111.111.111.223 port=3433 user=rep password=xxxxxxxxxx' restore_command = 'BASEDIR="/pg_arch/pg3433" ; find $BASEDIR -name %f -exec cp {} %p \\;'
3、修改数据库认证权限访问控制ACL,pg_hba.conf
vi pg_hba.conf host all all 0.0.0.0/0 md5 host replication rep 0.0.0.0/0 md5
4、启动数据库
pg_ctl start
5、创建流复制,数据库角色
psql -n create role rep replication login encrypted password 'xxxxxxxxxx';
1、创建备库
su - postgres
pg_basebackup -D $PGDATA -F p -h 111.111.111.199 -p 3433 -U rep
cd $PGDATA
vi postgresql.auto.conf
archive_command = 'DT="/pg_arch/pg3433/`date +%F`" ; test ! -d $DT && mkdir -p $DT ; test ! -f $DT/%f && cp %p $DT/%f'
mv recovery.done recovery.conf
vi recovery.conf
primary_conninfo = 'host=111.111.111.199 port=3433 user=rep password=xxxxxxxxxx'
5、修改数据目录权限
su - root
chmod 700 /data01/pg_root3433
1、系统启动时,自动启动数据库
vi /etc/rc.local
# 追加
su - postgres -c "pg_ctl start"
2、重启ECS验证
reboot
su - postgres
psql
postgres=# show max_connections ;
max_connections
-----------------
2000
(1 row)
1、配置@R_301_350@(快照备份)
su - root
mkdir script
vi /root/script/zfs_snap.sh
#!/bin/bash
date +%F%T
# 自动创建、清理快照
/sbin/zfs snapshot zp1/data01@`date +%Y%m%d`
/sbin/zfs destroy zp1/data01@`date +%Y%m%d -d '-10 day'`
/sbin/zfs list -t snapshot
# 自动清理归档
rm -rf /pg_arch/pg3433/`date +%F -d '-11 day'`
date +%F%T
2、设置脚本权限
chmod 500 /root/script/zfs_snap.sh
3、设置@R_301_350@任务,每天备份一次
# crontab -e
1 8 * * * /root/script/zfs_snap.sh >> /root/zfs_snap.log 2>&1
vi /etc/rc.local # 追加 su - postgres -c "pg_ctl start"
su - root mkdir script vi /root/script/zfs_snap.sh #!/bin/bash date +%F%T # 自动创建、清理快照 /sbin/zfs snapshot zp1/data01@`date +%Y%m%d` /sbin/zfs destroy zp1/data01@`date +%Y%m%d -d '-10 day'` /sbin/zfs list -t snapshot # 自动清理归档 rm -rf /pg_arch/pg3433/`date +%F -d '-11 day'` date +%F%T