网站首页 > 技术文章 正文
CentOS7部署Ceph-nautilus 14.2.18版本集群
主机名 | 系统 | IP | 安装软件 |
ceph-admin | CentOS7.9 | 192.168.168.200 | ceph-deploy |
ceph-node1 | CentOS7.9 | 192.168.168.201 | ceph |
ceph-node2 | CentOS7.9 | 192.168.168.202 | ceph |
ceph-node3 | CentOS7.9 | 192.168.168.203 | ceph |
环境准备
版本信息
[root@ceph-admin my-cluster]# cat /proc/version
Linux version 3.10.0-1062.4.1.el7.x86_64 (mockbuild@kbuilder.bsys.centos.org) (gcc version 4.8.5 20150623 (Red Hat 4.8.5-39) (GCC) ) #1 SMP Fri Oct 18 17:15:30 UTC 2019
[root@ceph-admin my-cluster]# cat /etc/redhat-release
CentOS Linux release 7.9.2009 (Core)
修改主机名和hosts
[root@ceph-admin ~]# cat>>/etc/hosts<<EOF
> 192.168.168.200 ceph-admin
> 192.168.168.201 ceph-node1
> 192.168.168.202 ceph-node2
> 192.168.168.203 ceph-node3
> EOF
[root@ceph-admin ~]# cat /etc/hosts | grep ceoh
[root@ceph-admin ~]# cat /etc/hosts | grep ceph
192.168.168.200 ceph-admin
192.168.168.201 ceph-node1
192.168.168.202 ceph-node2
192.168.168.203 ceph-node3
配置epel源和ceph源(所有节点)
# 配置阿里云epel源,如果有,可以先做备份
wget -O /etc/yum.repos.d/epel.repo http://mirrors.aliyun.com/repo/epel-7.repo
# 配置阿里云ceph源
cat>/etc/yum.repos.d/ceph.repo<<EOF
[ceph-source]
name=Ceph source packages
baseurl=https://mirrors.aliyun.com/ceph/rpm-nautilus/el7/SRPMS/
enabled=1
gpgcheck=0
type=rpm-md
gpgkey=https://mirrors.aliyun.com/ceph/keys/release.asc
priority=1
[ceph-aarch64]
name=Ceph aarch64 packages
baseurl=https://mirrors.aliyun.com/ceph/rpm-nautilus/el7/aarch64/
enabled=1
gpgcheck=0
type=rpm-md
gpgkey=https://mirrors.aliyun.com/ceph/keys/release.asc
priority=1
[ceph-noarch]
name=Ceph noarch packages
baseurl=https://mirrors.aliyun.com/ceph/rpm-nautilus/el7/noarch/
enabled=1
gpgcheck=0
type=rpm-md
gpgkey=https://mirrors.aliyun.com/ceph/keys/release.asc
priority=1
[ceph-x86_64]
name=Ceph x86_64 packages
baseurl=https://mirrors.aliyun.com/ceph/rpm-nautilus/el7/x86_64/
enabled=1
gpgcheck=0
type=rpm-md
gpgkey=https://mirrors.aliyun.com/ceph/keys/release.asc
priority=1
EOF
# 更新并查看配置的仓库
yum update
# 执行完后再执行下面
yum repolist
# 应出现epel和ceph仓库信息
# 生成缓存
yum makecache
关闭防火墙及selinux(所有节点)
# 关闭防火墙并设置开启不启动
systemctl stop firewalld
systemctl disable firewalld
# 关闭selinux并设置开启不启动
setenforce 0
sed -i 's/SELINUX=enforcing/SELINUX=disabled/g' /etc/sysconfig/selinux
配置时间同步(所有节点)
管理节点配置时间服务
# 所有节点设置时区
timedatectl set-timezone Asia/Shanghai
cp /etc/chrony.conf /etc/chrony.conf.bak
# 修改为阿里云时间服务器
sed -i "/^server 0/cserver ntp.aliyun.com iburst" /etc/chrony.conf
sed -i "/^server 1/cserver ntp1.aliyun.com iburst" /etc/chrony.conf
sed -i "/^server 2/cserver ntp2.aliyun.com iburst" /etc/chrony.conf
sed -i "/^server 3/cserver ntp3.aliyun.com iburst" /etc/chrony.conf
# 新增以自己为server
sed -i '/^# Please / a\server 127.0.0.1 iburst' /etc/chrony.conf
# 配置允许访问的client网段
sed -i '/#allow / a\allow 192.168.168.0/24' /etc/chrony.conf
# 设置即使不同步其他时钟源,该server依然可以作为时钟源
sed -i '/^#local / s/^#\(.*\)$/\1/g' /etc/chrony.conf
# 重启chrony,使其生效
systemctl restart chronyd
# 检查时间同步是否正常,client端出现*号表示时间同步正常
chronyc sources
# 管理节点配置
[root@ceph-admin ~]# cat /etc/chrony.conf
# Use public servers from the pool.ntp.org project.
# Please consider joining the pool (http://www.pool.ntp.org/join.html).
server 127.0.0.1 iburst
server ntp.aliyun.com iburst
server ntp1.aliyun.com iburst
server ntp2.aliyun.com iburst
server ntp3.aliyun.com iburst
# Record the rate at which the system clock gains/losses time.
driftfile /var/lib/chrony/drift
# Allow the system clock to be stepped in the first three updates
# if its offset is larger than 1 second.
makestep 1.0 3
# Enable kernel synchronization of the real-time clock (RTC).
rtcsync
# Enable hardware timestamping on all interfaces that support it.
#hwtimestamp *
# Increase the minimum number of selectable sources required to adjust
# the system clock.
#minsources 2
# Allow NTP client access from local network.
#allow 192.168.0.0/16
allow 192.168.168.0/24
# Serve time even if not synchronized to a time source.
local stratum 10
# Specify file containing keys for NTP authentication.
#keyfile /etc/chrony.keys
其他节点配置
# 所有节点设置时区
timedatectl set-timezone Asia/Shanghai
cp /etc/chrony.conf /etc/chrony.conf.bak
# 注释原有的时间同步server
sed -i '/^server/ s/^\(.*\)$/# \1/g' /etc/chrony.conf
# 新增以ceph-admin为server
sed -i '/^# Please / a\server 192.168.168.200 iburst' /etc/chrony.conf
# 所有节点重启chrony,使其生效
systemctl restart chronyd
# 检查时间同步是否正常,client端出现*号表示时间同步正常
[root@ceph-node1 ~]# chronyc sources
210 Number of sources = 1
MS Name/IP address Stratum Poll Reach LastRx Last sample
===============================================================================
^* 192.168.168.200 3 6 17 38 +478ns[ -21us] +/- 21ms
设置admin节点免密登录
# ceph-admin节点操作
# 由于使用root用户安装,所以不创建新账号
# 创建公钥,选项全部默认
ssh-keygen
# 将公钥分发到各个node节点
for i in 1 2 3
do
ssh-copy-id root@ceph-node$i
done
admin节点安装ceph-deploy
[root@ceph-admin ~]# yum -y install ceph-deploy
[root@ceph-admin ~]# ceph-deploy --version
Traceback (most recent call last):
File "/usr/bin/ceph-deploy", line 18, in <module>
from ceph_deploy.cli import main
File "/usr/lib/python2.7/site-packages/ceph_deploy/cli.py", line 1, in <module>
import pkg_resources
ImportError: No module named pkg_resources
[root@ceph-admin ~]# yum search python2-pip
已加载插件:fastestmirror
Loading mirror speeds from cached hostfile
* base: mirrors.aliyun.com
* extras: mirrors.aliyun.com
* updates: mirrors.aliyun.com
================================================================= N/S matched: python2-pip =================================================================
python2-pip.noarch : A tool for installing and managing Python 2 packages
名称和简介匹配 only,使用“search all”试试。
[root@ceph-admin ~]# yum install -y python2-pip
已安装:
python2-pip.noarch 0:8.1.2-14.el7
作为依赖被安装:
python-backports.x86_64 0:1.0-8.el7 python-backports-ssl_match_hostname.noarch 0:3.5.0.1-1.el7 python-ipaddress.noarch 0:1.0.16-2.el7
python-setuptools.noarch 0:0.9.8-7.el7
完毕!
[root@ceph-admin ~]# ceph-deploy --version
2.0.1
部署集群
https://docs.ceph.com/en/nautilus/start/quick-ceph-deploy/#expanding-your-cluster
创建集群
admin节点创建集群
[root@ceph-admin ~]# pwd
/root
[root@ceph-admin ~]# mkdir my-cluster
[root@ceph-admin ~]# cd my-cluster/
[root@ceph-admin my-cluster]#
# 创建一个集群,
# 如果有多网卡,需配置 --public-network 192.168.168.0/24 用于对外服务,
# --cluster-network 192.168.1.0/24 用于集群内部同步
# 配置monitor节点为ceph-node1
# ceph-deploy new --public-network 192.168.168.0/24 --cluster-network 192.168.1.0/24 ceph-node1
[root@ceph-admin my-cluster]# ceph-deploy new ceph-node1
[ceph_deploy.conf][DEBUG ] found configuration file at: /root/.cephdeploy.conf
[ceph_deploy.cli][INFO ] Invoked (2.0.1): /usr/bin/ceph-deploy new ceph-node1
[ceph_deploy.cli][INFO ] ceph-deploy options:
[ceph_deploy.cli][INFO ] username : None
[ceph_deploy.cli][INFO ] func : <function new at 0x7f49f4477de8>
[ceph_deploy.cli][INFO ] verbose : False
[ceph_deploy.cli][INFO ] overwrite_conf : False
[ceph_deploy.cli][INFO ] quiet : False
[ceph_deploy.cli][INFO ] cd_conf : <ceph_deploy.conf.cephdeploy.Conf instance at 0x7f49f3bf2518>
[ceph_deploy.cli][INFO ] cluster : ceph
[ceph_deploy.cli][INFO ] ssh_copykey : True
[ceph_deploy.cli][INFO ] mon : ['ceph-node1']
[ceph_deploy.cli][INFO ] public_network : None
[ceph_deploy.cli][INFO ] ceph_conf : None
[ceph_deploy.cli][INFO ] cluster_network : None
[ceph_deploy.cli][INFO ] default_release : False
[ceph_deploy.cli][INFO ] fsid : None
[ceph_deploy.new][DEBUG ] Creating new cluster named ceph
[ceph_deploy.new][INFO ] making sure passwordless SSH succeeds
[ceph-node1][DEBUG ] connected to host: ceph-admin
[ceph-node1][INFO ] Running command: ssh -CT -o BatchMode=yes ceph-node1
[ceph-node1][DEBUG ] connected to host: ceph-node1
[ceph-node1][DEBUG ] detect platform information from remote host
[ceph-node1][DEBUG ] detect machine type
[ceph-node1][DEBUG ] find the location of an executable
[ceph-node1][INFO ] Running command: /usr/sbin/ip link show
[ceph-node1][INFO ] Running command: /usr/sbin/ip addr show
[ceph-node1][DEBUG ] IP addresses found: [u'192.168.168.201']
[ceph_deploy.new][DEBUG ] Resolving host ceph-node1
[ceph_deploy.new][DEBUG ] Monitor ceph-node1 at 192.168.168.201
[ceph_deploy.new][DEBUG ] Monitor initial members are ['ceph-node1']
[ceph_deploy.new][DEBUG ] Monitor addrs are ['192.168.168.201']
[ceph_deploy.new][DEBUG ] Creating a random mon key...
[ceph_deploy.new][DEBUG ] Writing monitor keyring to ceph.mon.keyring...
[ceph_deploy.new][DEBUG ] Writing initial config to ceph.conf...
[root@ceph-admin my-cluster]# ls
ceph.conf ceph-deploy-ceph.log ceph.mon.keyring
[root@ceph-admin my-cluster]# cat ceph.conf
[global]
fsid = 5f844a3b-1257-4089-bff5-836def275bf0
mon_initial_members = ceph-node1
mon_host = 192.168.168.201
auth_cluster_required = cephx
auth_service_required = cephx
auth_client_required = cephx
[root@ceph-admin my-cluster]# cat ceph.mon.keyring
[mon.]
key = AQCJFldgAAAAABAAP/KUN+lSmq04mFsIkXUzcA==
caps mon = allow *
安装Ceph包
ceph-deploy安装(不推荐)
[root@ceph-admin my-cluster]# ceph-deploy install ceph-node1 ceph-node2 ceph-node3
[ceph_deploy.conf][DEBUG ] found configuration file at: /root/.cephdeploy.conf
[ceph_deploy.cli][INFO ] Invoked (2.0.1): /usr/bin/ceph-deploy install ceph-node1 ceph-node2 ceph-node3
[ceph_deploy.cli][INFO ] ceph-deploy options:
[ceph_deploy.cli][INFO ] verbose : False
[ceph_deploy.cli][INFO ] testing : None
[ceph_deploy.cli][INFO ] cd_conf : <ceph_deploy.conf.cephdeploy.Conf instance at 0x7f505e53a830>
[ceph_deploy.cli][INFO ] cluster : ceph
[ceph_deploy.cli][INFO ] dev_commit : None
[ceph_deploy.cli][INFO ] install_mds : False
[ceph_deploy.cli][INFO ] stable : None
[ceph_deploy.cli][INFO ] default_release : False
[ceph_deploy.cli][INFO ] username : None
[ceph_deploy.cli][INFO ] adjust_repos : True
[ceph_deploy.cli][INFO ] func : <function install at 0x7f505f0045f0>
[ceph_deploy.cli][INFO ] install_mgr : False
[ceph_deploy.cli][INFO ] install_all : False
[ceph_deploy.cli][INFO ] repo : False
[ceph_deploy.cli][INFO ] host : ['ceph-node1', 'ceph-node2', 'ceph-node3']
[ceph_deploy.cli][INFO ] install_rgw : False
[ceph_deploy.cli][INFO ] install_tests : False
[ceph_deploy.cli][INFO ] repo_url : None
[ceph_deploy.cli][INFO ] ceph_conf : None
[ceph_deploy.cli][INFO ] install_osd : False
[ceph_deploy.cli][INFO ] version_kind : stable
[ceph_deploy.cli][INFO ] install_common : False
[ceph_deploy.cli][INFO ] overwrite_conf : False
[ceph_deploy.cli][INFO ] quiet : False
[ceph_deploy.cli][INFO ] dev : master
[ceph_deploy.cli][INFO ] nogpgcheck : False
[ceph_deploy.cli][INFO ] local_mirror : None
[ceph_deploy.cli][INFO ] release : None
[ceph_deploy.cli][INFO ] install_mon : False
[ceph_deploy.cli][INFO ] gpg_url : None
[ceph_deploy.install][DEBUG ] Installing stable version mimic on cluster ceph hosts ceph-node1 ceph-node2 ceph-node3
[ceph_deploy.install][DEBUG ] Detecting platform for host ceph-node1 ...
[ceph-node1][DEBUG ] connected to host: ceph-node1
# 。。。。。。。
[ceph-node1][DEBUG ] 正在检查 /var/tmp/yum-root-7DB79r/ceph-release-1-0.el7.noarch.rpm: ceph-release-1-1.el7.noarch
[ceph-node1][DEBUG ] /var/tmp/yum-root-7DB79r/ceph-release-1-0.el7.noarch.rpm 将被安装
[ceph-node1][DEBUG ] 正在解决依赖关系
[ceph-node1][DEBUG ] --> 正在检查事务
[ceph-node1][DEBUG ] ---> 软件包 ceph-release.noarch.0.1-1.el7 将被 安装
[ceph-node1][DEBUG ] --> 解决依赖关系完成
[ceph-node1][DEBUG ]
[ceph-node1][DEBUG ] 依赖关系解决
[ceph-node1][DEBUG ]
[ceph-node1][DEBUG ] ================================================================================
[ceph-node1][DEBUG ] Package 架构 版本 源 大小
[ceph-node1][DEBUG ] ================================================================================
[ceph-node1][DEBUG ] 正在安装:
[ceph-node1][DEBUG ] ceph-release noarch 1-1.el7 /ceph-release-1-0.el7.noarch 535
[ceph-node1][DEBUG ]
[ceph-node1][DEBUG ] 事务概要
[ceph-node1][DEBUG ] ================================================================================
[ceph-node1][DEBUG ] 安装 1 软件包
[ceph-node1][DEBUG ]
[ceph-node1][DEBUG ] 总计:535
[ceph-node1][DEBUG ] 安装大小:535
[ceph-node1][DEBUG ] Downloading packages:
[ceph-node1][DEBUG ] Running transaction check
[ceph-node1][DEBUG ] Running transaction test
[ceph-node1][DEBUG ] Transaction test succeeded
[ceph-node1][DEBUG ] Running transaction
[ceph-node1][DEBUG ] 正在安装 : ceph-release-1-1.el7.noarch 1/1
[ceph-node1][DEBUG ] 警告:/etc/yum.repos.d/ceph.repo 已建立为 /etc/yum.repos.d/ceph.repo.rpmnew
[ceph-node1][DEBUG ] 验证中 : ceph-release-1-1.el7.noarch 1/1
[ceph-node1][DEBUG ]
[ceph-node1][DEBUG ] 已安装:
[ceph-node1][DEBUG ] ceph-release.noarch 0:1-1.el7
[ceph-node1][DEBUG ]
[ceph-node1][DEBUG ] 完毕!
[ceph-node1][WARNIN] ensuring that /etc/yum.repos.d/ceph.repo contains a high priority
[ceph_deploy][ERROR ] RuntimeError: NoSectionError: No section: 'ceph'
解决办法:在失败的节点上执行yum remove ceph-release
然后在admin节点上继续执行
[root@ceph-admin my-cluster]# ceph-deploy install ceph-node1
使用eph-deploy install ceph-node1安装后,客户端查看,并不是预期的nautilus版本
[root@ceph-node1 ~]# ceph -v
ceph version 13.2.10 (564bdc4ae87418a232fc901524470e1a0f76d641) mimic (stable)
yum安装ceph
先通过yum list ceph查询是否是需要的版本,如果查不到任何内容,先检查ceph源是否存在,再次查询。主要要添加好ceph对应版本的源。
[root@ceph-node1 ~]# yum list ceph
已加载插件:fastestmirror, priorities
Loading mirror speeds from cached hostfile
* base: mirrors.aliyun.com
* extras: mirrors.aliyun.com
* updates: mirrors.aliyun.com
8 packages excluded due to repository priority protections
可安装的软件包
ceph.x86_64 2:14.2.18-0.el7 ceph-x86_64
根据角色,node节点安装ceph软件包:ceph ceph-mon ceph-mgr ceph-radosgw ceph-mds
开始安装
# node1节点需要安装monitor
[root@ceph-node1 ~]# yum install -y ceph ceph-mon
# 。。。
已安装:
ceph.x86_64 2:14.2.18-0.el7 ceph-mon.x86_64 2:14.2.18-0.el7
作为依赖被安装:
ceph-base.x86_64 2:14.2.18-0.el7 ceph-common.x86_64 2:14.2.18-0.el7 ceph-mds.x86_64 2:14.2.18-0.el7
ceph-mgr.x86_64 2:14.2.18-0.el7 ceph-osd.x86_64 2:14.2.18-0.el7 ceph-selinux.x86_64 2:14.2.18-0.el7
libconfig.x86_64 0:1.4.9-5.el7 librdkafka.x86_64 0:0.11.5-1.el7 libstoragemgmt.x86_64 0:1.8.1-2.el7_9
libstoragemgmt-python.noarch 0:1.8.1-2.el7_9 libstoragemgmt-python-clibs.x86_64 0:1.8.1-2.el7_9 python-enum34.noarch 0:1.0.4-1.el7
yajl.x86_64 0:2.0.4-4.el7
完毕!
# 检查安装的版本
[root@ceph-node1 ~]# ceph -v
ceph version 14.2.18 (befbc92f3c11eedd8626487211d200c0b44786d9) nautilus (stable)
[root@ceph-node1 ~]# ceph -s
Error initializing cluster client: ObjectNotFound('error calling conf_read_file',)
# node2、node3节点仅按章ceph
[root@ceph-node2 ~]# yum install -y ceph
[root@ceph-node3 ~]# yum install -y ceph
admin节点初始化monitor
ceph-deploy mon create-initial:将部署为在“mon initial members”中定义的监视器,等待它们形成仲裁,然后进将为“mon initial members”中定义的监控器部署,等待它们形成仲裁,然后收集密钥,并在进程中报告监控器状态。如果监视器没有形成仲裁,命令最终将超时。
# admin节点运行,初始化monitor
[root@ceph-admin my-cluster]# ceph-deploy mon create-initial
# 。。。
[ceph_deploy.gatherkeys][INFO ] Storing ceph.client.admin.keyring
[ceph_deploy.gatherkeys][INFO ] Storing ceph.bootstrap-mds.keyring
[ceph_deploy.gatherkeys][INFO ] Storing ceph.bootstrap-mgr.keyring
[ceph_deploy.gatherkeys][INFO ] keyring 'ceph.mon.keyring' already exists
[ceph_deploy.gatherkeys][INFO ] Storing ceph.bootstrap-osd.keyring
[ceph_deploy.gatherkeys][INFO ] Storing ceph.bootstrap-rgw.keyring
[ceph_deploy.gatherkeys][INFO ] Destroy temp directory /tmp/tmpogYPsd
[root@ceph-admin my-cluster]# ls
ceph.bootstrap-mds.keyring ceph.bootstrap-osd.keyring ceph.client.admin.keyring ceph-deploy-ceph.log
ceph.bootstrap-mgr.keyring ceph.bootstrap-rgw.keyring ceph.conf ceph.mon.keyring
# 将配置文件以管理员(admin)用推送到各个节点,注:此处的admin是代表管理员角色,与节点无关
[root@ceph-admin my-cluster]# ceph-deploy admin ceph-node1 ceph-node2 ceph-node3
在monitor节点检查是否初始化完成
# node1节点(monitor节点)查看集群是否初始化成功,HEALTH_OK代表集群正常
[root@ceph-node1 ~]# ceph -s
cluster:
id: 5f844a3b-1257-4089-bff5-836def275bf0
health: HEALTH_OK
services:
mon: 1 daemons, quorum ceph-node1 (age 75s)
mgr: no daemons active
osd: 0 osds: 0 up, 0 in
data:
pools: 0 pools, 0 pgs
objects: 0 objects, 0 B
usage: 0 B used, 0 B / 0 B avail
pgs:
部署manager demon(mgr)用于监控
# 部署mgr到node1节点
[root@ceph-admin my-cluster]# ceph-deploy mgr create ceph-node1
在node节点上查看集群状态,可以看到mgr已经添加到ceph集群,此处的 HEALTH_WARN 代表还没添加OSD
[root@ceph-node1 ~]# ceph -s
cluster:
id: 5f844a3b-1257-4089-bff5-836def275bf0
health: HEALTH_WARN
OSD count 0 < osd_pool_default_size 3
services:
mon: 1 daemons, quorum ceph-node1 (age 62m)
mgr: ceph-node1(active, since 4m)
osd: 0 osds: 0 up, 0 in
data:
pools: 0 pools, 0 pgs
objects: 0 objects, 0 B
usage: 0 B used, 0 B / 0 B avail
pgs:
添加OSD
每台node节点主机上添加一块硬盘,可以看到名称为/dev/sdb
[root@ceph-node1 ~]# lsblk
NAME MAJ:MIN RM SIZE RO TYPE MOUNTPOINT
sda 8:0 0 10G 0 disk
├─sda1 8:1 0 1G 0 part /boot
└─sda2 8:2 0 9G 0 part
├─centos-root 253:0 0 8G 0 lvm /
└─centos-swap 253:1 0 1G 0 lvm [SWAP]
sdb 8:16 0 10G 0 disk
sr0 11:0 1 1024M 0 rom
将三个节点的/dev/sdb加入OSD
# admin节点操作,生产中可以加上journal进行加速
# 将node节点的/dev/sdb加入OSD
[root@ceph-admin my-cluster]# ceph-deploy osd create --data /dev/sdb ceph-node1
# 。。。
[ceph-node1][INFO ] checking OSD status...
[ceph-node1][DEBUG ] find the location of an executable
[ceph-node1][INFO ] Running command: /bin/ceph --cluster=ceph osd stat --format=json
[ceph_deploy.osd][DEBUG ] Host ceph-node1 is now ready for osd use.
# 可以在node节点主机上查看集群状态,表示osd数量为1,但因为小于默认数量3,集群仍为HEALTH_WARN状态
[root@ceph-node1 ~]# ceph -s
cluster:
id: 5f844a3b-1257-4089-bff5-836def275bf0
health: HEALTH_WARN
OSD count 1 < osd_pool_default_size 3
services:
mon: 1 daemons, quorum ceph-node1 (age 6m)
mgr: ceph-node1(active, since 6m)
osd: 1 osds: 1 up (since 35s), 1 in (since 35s)
data:
pools: 0 pools, 0 pgs
objects: 0 objects, 0 B
usage: 1.0 GiB used, 9.0 GiB / 10 GiB avail
pgs:
# 在node节点查看osd
[root@ceph-node1 ~]# ceph osd tree
ID CLASS WEIGHT TYPE NAME STATUS REWEIGHT PRI-AFF
-1 0.00980 root default
-3 0.00980 host ceph-node1
0 hdd 0.00980 osd.0 up 1.00000 1.00000
继续添加剩余的OSD
[root@ceph-admin my-cluster]# ceph-deploy osd create --data /dev/sdb ceph-node2
[root@ceph-admin my-cluster]# ceph-deploy osd create --data /dev/sdb ceph-node3
最后再来查看所有OSD和集群状态
# 在node节点查看OSD
[root@ceph-node1 ~]# ceph osd tree
ID CLASS WEIGHT TYPE NAME STATUS REWEIGHT PRI-AFF
-1 0.02939 root default
-3 0.00980 host ceph-node1
0 hdd 0.00980 osd.0 up 1.00000 1.00000
-5 0.00980 host ceph-node2
1 hdd 0.00980 osd.1 up 1.00000 1.00000
-7 0.00980 host ceph-node3
2 hdd 0.00980 osd.2 up 1.00000 1.00000
# 集群状态正常,共3个节点,其中1个monitor,1个mgr,3个osd
[root@ceph-node1 ~]# ceph -s
cluster:
id: 5f844a3b-1257-4089-bff5-836def275bf0
health: HEALTH_OK
services:
mon: 1 daemons, quorum ceph-node1 (age 10m)
mgr: ceph-node1(active, since 10m)
osd: 3 osds: 3 up (since 35s), 3 in (since 35s)
data:
pools: 0 pools, 0 pgs
objects: 0 objects, 0 B
usage: 3.0 GiB used, 27 GiB / 30 GiB avail
pgs:
扩容集群
https://docs.ceph.com/en/nautilus/start/quick-ceph-deploy/#expanding-your-cluster
Ceph Metadata Server 元数据服务器添加到node1节点。然后将Ceph Monitor和Ceph Manager添加到node2和node3节点,以提高可靠性和可用性。
添加元数据服务器
要使用Cephfs,您需要至少一个元数据服务器。执行以下内容以创建元数据服务器:
[root@ceph-admin my-cluster]# ceph-deploy mds create ceph-node1
查看状态:mds
# 在node节点上查看mds
[root@ceph-node1 ~]# ceph -s
cluster:
id: 5f844a3b-1257-4089-bff5-836def275bf0
health: HEALTH_OK
services:
mon: 1 daemons, quorum ceph-node1 (age 48m)
mgr: ceph-node1(active, since 48m)
mds: 1 up:standby
osd: 3 osds: 3 up (since 38m), 3 in (since 38m)
data:
pools: 0 pools, 0 pgs
objects: 0 objects, 0 B
usage: 3.0 GiB used, 27 GiB / 30 GiB avail
pgs:
添加监控节点
mon是集群的核心,使用了paxos算法,所以一般会使用奇数个节点。
ceph-deploy mon add:将监控器添加到现有群集:ceph deploy mon add ceph-node1或:ceph deploy mon add --address 192.168.168.201 ceph-node1。
mon添加失败
# 先在node节点上查看mon状态
[root@ceph-node1 ~]# ceph mon stat
e1: 1 mons at {ceph-node1=[v2:192.168.168.201:3300/0,v1:192.168.168.201:6789/0]}, election epoch 5, leader 0 ceph-node1, quorum 0 ceph-node1
# admin节点操作,添加node2和node3为monitor
[root@ceph-admin my-cluster]# ceph-deploy mon add ceph-node2
# 执行报错
[ceph-node2][INFO ] Running command: ceph --cluster=ceph --admin-daemon /var/run/ceph/ceph-mon.ceph-node2.asok mon_status
[ceph-node2][ERROR ] admin_socket: exception getting command descriptions: [Errno 2] No such file or directory
[ceph-node2][WARNIN] ceph-node2 is not defined in `mon initial members`
[ceph-node2][WARNIN] monitor ceph-node2 does not exist in monmap
[ceph-node2][WARNIN] neither `public_addr` nor `public_network` keys are defined for monitors
[ceph-node2][WARNIN] monitors may not be able to form quorum
[ceph-node2][INFO ] Running command: ceph --cluster=ceph --admin-daemon /var/run/ceph/ceph-mon.ceph-node2.asok mon_status
[ceph-node2][ERROR ] admin_socket: exception getting command descriptions: [Errno 2] No such file or directory
[ceph-node2][WARNIN] monitor: mon.ceph-node2, might not be running yet
mon问题处理
查阅资料,要在ceph.conf定义一下public_network,一定要添加到[global]段
# 修改配置文件,添加
[root@ceph-admin my-cluster]# vim ceph.conf
[root@ceph-admin my-cluster]# cat ceph.conf
[global]
fsid = 5f844a3b-1257-4089-bff5-836def275bf0
mon_initial_members = ceph-node1
mon_host = 192.168.168.201
auth_cluster_required = cephx
auth_service_required = cephx
auth_client_required = cephx
public network = 192.168.168.0/24
# 推送到node节点
[root@ceph-admin my-cluster]# ceph-deploy --overwrite-conf config push ceph-node1 ceph-node2 ceph-node3
必须使用--overwrite-conf参数,否则会提示RuntimeError: config file /etc/ceph/ceph.conf exists with different content; use --overwrite-conf to overwrite失败。
修改完配置文件,且推送到node后再次执行
# admin节点操作,添加node2为monitor
[root@ceph-admin my-cluster]# ceph-deploy mon add ceph-node2
# admin节点操作,添加node3为monitor
[root@ceph-admin my-cluster]# ceph-deploy mon add ceph-node3
# 其中有个 [ceph-node2][WARNIN] ceph-node2 is not defined in `mon initial members` 的错误警告
# 可以将mon_initial_members = ceph-node1,ceph-node2,ceph-node3 添加到ceph.conf配置中,并推送到node节点
[root@ceph-admin my-cluster]# cat ceph.conf
[global]
fsid = 5f844a3b-1257-4089-bff5-836def275bf0
mon_initial_members = ceph-node1,ceph-node2,ceph-node3
mon_host = 192.168.168.201
auth_cluster_required = cephx
auth_service_required = cephx
auth_client_required = cephx
public network = 192.168.168.0/24
查看状态:mon
查看monitor状态
# 查看仲裁状态,json格式
[root@ceph-node1 ~]# ceph quorum_status --format json-pretty
{
"election_epoch": 14,
"quorum": [
0,
1,
2
],
"quorum_names": [
"ceph-node1",
"ceph-node2",
"ceph-node3"
],
"quorum_leader_name": "ceph-node1",
"quorum_age": 257,
"monmap": {
"epoch": 3,
"fsid": "5f844a3b-1257-4089-bff5-836def275bf0",
"modified": "2021-03-21 22:10:22.992987",
"created": "2021-03-21 18:53:14.599473",
"min_mon_release": 14,
"min_mon_release_name": "nautilus",
"features": {
"persistent": [
"kraken",
"luminous",
"mimic",
"osdmap-prune",
"nautilus"
],
"optional": []
},
"mons": [
{
"rank": 0,
"name": "ceph-node1",
"public_addrs": {
"addrvec": [
{
"type": "v2",
"addr": "192.168.168.201:3300",
"nonce": 0
},
{
"type": "v1",
"addr": "192.168.168.201:6789",
"nonce": 0
}
]
},
"addr": "192.168.168.201:6789/0",
"public_addr": "192.168.168.201:6789/0"
},
{
"rank": 1,
"name": "ceph-node2",
"public_addrs": {
"addrvec": [
{
"type": "v2",
"addr": "192.168.168.202:3300",
"nonce": 0
},
{
"type": "v1",
"addr": "192.168.168.202:6789",
"nonce": 0
}
]
},
"addr": "192.168.168.202:6789/0",
"public_addr": "192.168.168.202:6789/0"
},
{
"rank": 2,
"name": "ceph-node3",
"public_addrs": {
"addrvec": [
{
"type": "v2",
"addr": "192.168.168.203:3300",
"nonce": 0
},
{
"type": "v1",
"addr": "192.168.168.203:6789",
"nonce": 0
}
]
},
"addr": "192.168.168.203:6789/0",
"public_addr": "192.168.168.203:6789/0"
}
]
}
}
# 查看mon状态
[root@ceph-node1 ~]# ceph mon stat
e3: 3 mons at {ceph-node1=[v2:192.168.168.201:3300/0,v1:192.168.168.201:6789/0],ceph-node2=[v2:192.168.168.202:3300/0,v1:192.168.168.202:6789/0],ceph-node3=[v2:192.168.168.203:3300/0,v1:192.168.168.203:6789/0]}, election epoch 14, leader 0 ceph-node1, quorum 0,1,2 ceph-node1,ceph-node2,ceph-node3
# 查看mon详细状态
[root@ceph-node1 ~]# ceph mon dump
dumped monmap epoch 3
epoch 3
fsid 5f844a3b-1257-4089-bff5-836def275bf0
last_changed 2021-03-21 22:10:22.992987
created 2021-03-21 18:53:14.599473
min_mon_release 14 (nautilus)
0: [v2:192.168.168.201:3300/0,v1:192.168.168.201:6789/0] mon.ceph-node1
1: [v2:192.168.168.202:3300/0,v1:192.168.168.202:6789/0] mon.ceph-node2
2: [v2:192.168.168.203:3300/0,v1:192.168.168.203:6789/0] mon.ceph-node3
# 查看集群状态
[root@ceph-node1 ~]# ceph -s
cluster:
id: 5f844a3b-1257-4089-bff5-836def275bf0
health: HEALTH_OK
services:
mon: 3 daemons, quorum ceph-node1,ceph-node2,ceph-node3 (age 6m)
mgr: ceph-node1(active, since 48m)
mds: 1 up:standby
osd: 3 osds: 3 up (since 53m), 3 in (since 2h)
data:
pools: 0 pools, 0 pgs
objects: 0 objects, 0 B
usage: 3.0 GiB used, 27 GiB / 30 GiB avail
pgs:
添加管理器
Ceph Manager守护进程以活动/待机模式运行。部署其他管理器守护程序可确保如果一个守护程序或主机发生故障,则可以在不中断服务的情况下接管另一个。
mgr默认是主备模式,同一时间只有一个是运行的。
# admin节点操作,扩容mgr
[root@ceph-admin my-cluster]# ceph-deploy mgr create ceph-node2 ceph-node3
查看状态:mgr
# 可以看到node1是主,运行状态,node2 node3是备用
[root@ceph-node1 ~]# ceph -s
cluster:
id: 5f844a3b-1257-4089-bff5-836def275bf0
health: HEALTH_OK
services:
mon: 3 daemons, quorum ceph-node1,ceph-node2,ceph-node3 (age 7m)
mgr: ceph-node1(active, since 95m), standbys: ceph-node2, ceph-node3
mds: 1 up:standby
osd: 3 osds: 3 up (since 12m), 3 in (since 85m)
data:
pools: 0 pools, 0 pgs
objects: 0 objects, 0 B
usage: 3.0 GiB used, 27 GiB / 30 GiB avail
pgs:
添加对象网关
要使用Ceph的Ceph对象网关组件,必须部署RGW的实例。执行以下内容以创建RGW的新实例:
# 首先node节点安装软件
root@ceph-node1 ~]# yum list ceph-radosgw
已加载插件:fastestmirror, priorities
Loading mirror speeds from cached hostfile
* base: mirrors.aliyun.com
* extras: mirrors.aliyun.com
* updates: mirrors.aliyun.com
8 packages excluded due to repository priority protections
可安装的软件包
ceph-radosgw.x86_64 2:14.2.18-0.el7 ceph-x86_64
[root@ceph-node1 ~]# yum install -y ceph-radosgw
# admin节点执行
[root@ceph-admin my-cluster]# ceph-deploy rgw create ceph-node1
# 。。。
[ceph_deploy.rgw][INFO ] The Ceph Object Gateway (RGW) is now running on host ceph-node1 and default port 7480
# 默认情况下,RGW实例将侦听端口7480.可以通过编辑运行RGW的节点上的ceph.conf来更改此操作,如下所示:
# ========ceph.conf
[client]
rgw frontends = civetweb port=80
[client]
rgw frontends = civetweb port=[::]:80
# ========ceph.conf
查看状态:rgw
查看集群状态
[root@ceph-node1 ~]# ceph -s
cluster:
id: 5f844a3b-1257-4089-bff5-836def275bf0
health: HEALTH_OK
services:
mon: 3 daemons, quorum ceph-node1,ceph-node2,ceph-node3 (age 22m)
mgr: ceph-node1(active, since 2h), standbys: ceph-node2, ceph-node3
mds: 1 up:standby
osd: 3 osds: 3 up (since 68m), 3 in (since 2h)
rgw: 1 daemon active (ceph-node1)
task status:
data:
pools: 4 pools, 128 pgs
objects: 187 objects, 1.2 KiB
usage: 3.0 GiB used, 27 GiB / 30 GiB avail
pgs: 128 active+clean
使用集群
客户端安装ceph
[root@centos7 ~]# hostnamectl set-hostname ceph-client
# 修改主机名
[root@ceph-client ~]#
ceph-client上做以下配置:
- 配置epel源和ceph源
- 关闭防火墙及selinux
- 配置时间同步
以上完成后安装ceph
[root@ceph-client ~]# yum -y install ceph ceph-radosgw -y
[root@ceph-client ~]# ceph --version
ceph version 14.2.18 (befbc92f3c11eedd8626487211d200c0b44786d9) nautilus (stable)
管理节点做免密登录,并推送密钥文件给ceph-client节点
# 配置主机名免密登录
[root@ceph-admin ~]# echo "192.168.168.210 ceph-client" >> /etc/hosts
[root@ceph-admin ~]# ssh-copy-id ceph-client
# 推送管理文件到客户机
[root@ceph-admin ~]# cd my-cluster/
[root@ceph-admin my-cluster]# ceph-deploy admin ceph-client
[ceph_deploy.conf][DEBUG ] found configuration file at: /root/.cephdeploy.conf
[ceph_deploy.cli][INFO ] Invoked (2.0.1): /usr/bin/ceph-deploy admin ceph-client
[ceph_deploy.cli][INFO ] ceph-deploy options:
[ceph_deploy.cli][INFO ] username : None
[ceph_deploy.cli][INFO ] verbose : False
[ceph_deploy.cli][INFO ] overwrite_conf : False
[ceph_deploy.cli][INFO ] quiet : False
[ceph_deploy.cli][INFO ] cd_conf : <ceph_deploy.conf.cephdeploy.Conf instance at 0x7efe4c9ae3f8>
[ceph_deploy.cli][INFO ] cluster : ceph
[ceph_deploy.cli][INFO ] client : ['ceph-client']
[ceph_deploy.cli][INFO ] func : <function admin at 0x7efe4d4cb230>
[ceph_deploy.cli][INFO ] ceph_conf : None
[ceph_deploy.cli][INFO ] default_release : False
[ceph_deploy.admin][DEBUG ] Pushing admin keys and conf to ceph-client
[ceph-client][DEBUG ] connected to host: ceph-client
[ceph-client][DEBUG ] detect platform information from remote host
[ceph-client][DEBUG ] detect machine type
[ceph-client][DEBUG ] write cluster configuration to /etc/ceph/{cluster}.conf
此时客户端就已经被生成一些文件如下
[root@ceph-client ~]# ls /etc/ceph/
ceph.client.admin.keyring ceph.conf rbdmap tmp5AzIry
rbd块设备使用
创建存储池pool
# 创建存储池,ceph-demo为pool name,默认三个副本,64 64分别为pg_num和pgp_num
[root@ceph-node1 ~]# ceph osd pool create ceph-demo 64 64
pool 'ceph-demo' created
# 查看所有的pool
[root@ceph-node1 ~]# ceph osd lspools
1 .rgw.root
2 default.rgw.control
3 default.rgw.meta
4 default.rgw.log
5 ceph-demo
# 查看pool详细信息,pg数量,pgp数量,副本数,调度算法等;可以用set设置pool配置
[root@ceph-node1 ~]# ceph osd pool get ceph-demo pg_num
pg_num: 64
[root@ceph-node1 ~]# ceph osd pool get ceph-demo pgp_num
pgp_num: 64
[root@ceph-node1 ~]# ceph osd pool get ceph-demo size
size: 3
[root@ceph-node1 ~]# ceph osd pool get ceph-demo min_size
min_size: 2
[root@ceph-node1 ~]# ceph osd pool get ceph-demo crush_rule
crush_rule: replicated_rule
创建块设备映像:rbd create
- 命令:rbd create --size {megabytes} {pool-name}/{image-name}
要把块设备加入某节点,必须在集群中创建一个映像。指定存储池必须是已创建好的。
# 在存储池pool为ceph-demo中,创建一个10G的rbd镜像,名为rbd-demo.img,feature为layering
[root@ceph-client ~]# rbd create -p ceph-demo --image rbd-demo.img --size 10G --image-feature layering
# 或者
[root@ceph-client ~]# rbd create ceph-demo/rbd-demo1.img --size 10G
# 初始化rbd,后面指定pool name
[root@ceph-client ~]# rbd pool init ceph-demo
如果创建映像时不指定存储池名称,默认使用rbd存储池,同时rbd存储池要先自行创建好。
rbd create --size 1024 foo表示在默认的rbd存储池中创建一个大小为1GB,名为foo的映像。
罗列块设备映像:rbd ls
- 命令:rbd ls {poolname}
如果不指定存储池,rbd ls则列出rbd存储池中的块设备。
# 查看指定的pool下的所有rbd
[root@ceph-client ~]# rbd -p ceph-demo ls
rbd-demo.img
rbd-demo1.img
# 或者
[root@ceph-client ~]# rbd ls ceph-demo
[root@ceph-client ~]# rbd list ceph-demo
检索映像信息:rbd info
- 命令:rbd info {image-name}
- 命令:rbd info {pool-name}/{image-name}
不指定存储池,rbd info foo则获取rbd存储池中foo映像的信息。
# 查看rbd详细信息
[root@ceph-client ~]# rbd info ceph-demo/rbd-demo.img
rbd image 'rbd-demo.img':
size 10 GiB in 2560 objects
order 22 (4 MiB objects)
snapshot_count: 0
id: 233b557767d26
block_name_prefix: rbd_data.233b557767d26
format: 2
features: layering
op_features:
flags:
create_timestamp: Tue Mar 30 23:02:03 2021
access_timestamp: Tue Mar 30 23:02:03 2021
modify_timestamp: Tue Mar 30 23:02:03 2021
[root@ceph-client ~]# rbd info ceph-demo/rbd-demo1.img
rbd image 'rbd-demo1.img':
size 10 GiB in 2560 objects
order 22 (4 MiB objects)
snapshot_count: 0
id: 2332d4832d85c
block_name_prefix: rbd_data.2332d4832d85c
format: 2
features: layering, exclusive-lock, object-map, fast-diff, deep-flatten
op_features:
flags:
create_timestamp: Tue Mar 30 23:03:22 2021
access_timestamp: Tue Mar 30 23:03:22 2021
modify_timestamp: Tue Mar 30 23:03:22 2021
删除块设备映像:rbd rm
- 命令:rbd rm {image-name}
- 命令:rbd rm {pool-name}/{image-name},从某存储池中删除一个块设备,用要删除的映像名字替换 {image-name} 、用存储池名字替换 {pool-name}
# 删除rbd镜像,删除rbd-demo1.img,或者:rbd rm ceph-demo/rbd-demo1.img
[root@ceph-client ~]# rbd rm -p ceph-demo --image rbd-demo1.img
Removing image: 100% complete...done.
映射块设备:rbd map
用 rbd 把映像名映射为内核模块。必须指定映像名、存储池名、和用户名。若 RBD 内核模块尚未加载, rbd 命令会自动加载。
- 命令:rbd map {pool-name}/{image-name} --id {user-name}
- 命令:rbd map {pool-name}/{image-name} --id {user-name} --keyring /path/to/keyring,如果启用了 cephx 认证,还必须提供密钥,可以用密钥环或密钥文件指定密钥。
# 映射,如果执行了多次,可以通过unmap取消映射
[root@ceph-client ~]# rbd -p ceph-demo ls
rbd-demo.img
[root@ceph-client ~]# rbd map ceph-demo/rbd-demo.img
/dev/rbd0
取消块设备映射:rbd unmap
- 命令:rbd unmap /dev/rbd/{poolname}/{imagename},用 rbd 命令、指定 unmap 选项和设备名。
[root@ceph-client ~]# rbd map ceph-demo/rbd-demo.img
rbd: warning: image already mapped as /dev/rbd0
/dev/rbd1
[root@ceph-client ~]# rbd device list
id pool namespace image snap device
0 ceph-demo rbd-demo.img - /dev/rbd0
1 ceph-demo rbd-demo.img - /dev/rbd1
# 取消块设备映射
[root@ceph-client ~]# rbd unmap /dev/rbd1
查看已映射块设备:rbd showmapped
可以用 rbd 命令的 showmapped 选项查看映射为内核模块的块设备映像。
# 查看rbd信息
[root@ceph-client ~]# rbd showmapped
id pool namespace image snap device
0 ceph-demo rbd-demo.img - /dev/rbd0
# 或者
[root@ceph-client ~]# rbd device list
id pool namespace image snap device
0 ceph-demo rbd-demo.img - /dev/rbd0
[root@ceph-client ~]# ls /dev/rbd/
ceph-demo
[root@ceph-client ~]# ls /dev/rbd/ceph-demo/
rbd-demo.img
格式化使用块设备
# 多出了一个裸设备/dev/rdb0
[root@ceph-client ~]# lsblk
NAME MAJ:MIN RM SIZE RO TYPE MOUNTPOINT
sda 8:0 0 10G 0 disk
├─sda1 8:1 0 1G 0 part /boot
└─sda2 8:2 0 9G 0 part
├─centos-root 253:0 0 8G 0 lvm /
└─centos-swap 253:1 0 1G 0 lvm [SWAP]
sr0 11:0 1 1024M 0 rom
rbd0 252:0 0 10G 0 disk
# 格式化设备,挂载使用
[root@ceph-client ~]# mkfs.ext4 /dev/rbd0
mke2fs 1.42.9 (28-Dec-2013)
Discarding device blocks: 完成
文件系统标签=
OS type: Linux
块大小=4096 (log=2)
分块大小=4096 (log=2)
Stride=1024 blocks, Stripe width=1024 blocks
655360 inodes, 2621440 blocks
131072 blocks (5.00%) reserved for the super user
第一个数据块=0
Maximum filesystem blocks=2151677952
80 block groups
32768 blocks per group, 32768 fragments per group
8192 inodes per group
Superblock backups stored on blocks:
32768, 98304, 163840, 229376, 294912, 819200, 884736, 1605632
Allocating group tables: 完成
正在写入inode表: 完成
Creating journal (32768 blocks): 完成
Writing superblocks and filesystem accounting information: 完成
# 挂载
[root@ceph-client ~]# mkdir /mnt/rbd-demo
[root@ceph-client ~]# mount /dev/rbd0 /mnt/rbd-demo/
[root@ceph-client ~]# df -hT
文件系统 类型 容量 已用 可用 已用% 挂载点
devtmpfs devtmpfs 561M 0 561M 0% /dev
tmpfs tmpfs 573M 0 573M 0% /dev/shm
tmpfs tmpfs 573M 8.7M 564M 2% /run
tmpfs tmpfs 573M 0 573M 0% /sys/fs/cgroup
/dev/mapper/centos-root xfs 8.0G 1.9G 6.2G 23% /
/dev/sda1 xfs 1014M 163M 852M 16% /boot
tmpfs tmpfs 115M 0 115M 0% /run/user/0
/dev/rbd0 ext4 9.8G 37M 9.2G 1% /mnt/rbd-demo
调整块设备映像大小:rbd resize
Ceph 块设备映像是精简配置,只有在你开始写入数据时它们才会占用物理空间。然而,它们都有最大容量,就是你设置的 --size 选项。如果你想增加(或减小) Ceph 块设备映像的最大尺寸,执行下列命令:
- 命令:rbd resize ceph-demo/rbd-demo.img --size 15G表示扩容;
- 命令:rbd resize ceph-demo/rbd-demo.img --size 8G --allow-shrink表示缩容。
# 扩容rbd
[root@ceph-client ~]# rbd resize ceph-demo/rbd-demo.img --size 15G
Resizing image: 100% complete...done.
[root@ceph-client ~]# df -h | grep rbd
/dev/rbd0 9.8G 37M 9.2G 1% /mnt/rbd-demo
# 刷新文件系统,才能正常显示扩容后的大小
[root@ceph-client ~]# resize2fs /dev/rbd0
resize2fs 1.42.9 (28-Dec-2013)
Filesystem at /dev/rbd0 is mounted on /mnt/rbd-demo; on-line resizing required
old_desc_blocks = 2, new_desc_blocks = 2
The filesystem on /dev/rbd0 is now 3932160 blocks long.
# 查看扩容是否成功
[root@ceph-client ~]# df -h | grep rbd
/dev/rbd0 15G 41M 14G 1% /mnt/rbd-demo
客户端重启后自动映射
[root@ceph-client ~]# rbd map ceph-demo/rbd-demo.img
/dev/rbd0
[root@ceph-client ~]# mount /dev/rbd0 /mnt/rbd-demo/
[root@ceph-client ~]# df -h | grep rbd
/dev/rbd0 15G 41M 14G 1% /mnt/rbd-demo
CephFS文件系统使用
Ceph 文件系统要求 Ceph 存储集群内至少有一个 Ceph 元数据服务器,即mds,前面已安装
[root@ceph-node3 ~]# ceph mds stat
1 up:standby
创建存储池
删除已创建的存储池
创建存储池,但结果不符合预期测试删除
[root@ceph-node1 ~]# ceph osd pool create cephfs_data 128
Error ERANGE: pg_num 128 size 3 would mean 960 total pgs, which exceeds max 750 (mon_max_pg_per_osd 250 * num_in_osds 3)
[root@ceph-node1 ~]# ceph osd pool create cephfs_data 64
Error ERANGE: pg_num 64 size 3 would mean 768 total pgs, which exceeds max 750 (mon_max_pg_per_osd 250 * num_in_osds 3)
[root@ceph-node1 ~]# ceph osd pool create cephfs_data 32
pool 'cephfs_data' created
# 删除该 pool
[root@ceph-node1 ~]# ceph osd pool delete cephfs_data cephfs_data --yes-i-really-really-mean-it
Error EPERM: pool deletion is disabled; you must first set the mon_allow_pool_delete config option to true before you can destroy a pool
# 提示需要在ceph.conf配置文件中加入下一行配置
[root@ceph-node1 ~]# cat /etc/ceph/ceph.conf | grep delete
mon_allow_pool_delete = true
# 重启ceph-mon服务
[root@ceph-node1 ~]# systemctl restart ceph-mon.target
# 重新删除就成功了
[root@ceph-node1 ~]# ceph osd pool delete cephfs_data cephfs_data --yes-i-really-really-mean-it
pool 'cephfs_data' removed
查看/调整pool的pg/pgp数量
开始重新计算并创建新的存储池,假设规划4个pool,目前有3个osd,那么每个pool中pg数量应改为25,2^4=16 < 25 < 2^5=32,即每个pool中pg的数量应该为32。
谨记在调整pg前,请确保集群状态是健康的。
# 获取自己已创建的
[root@ceph-node1 ~]# ceph osd lspools | grep -v "\."
5 ceph-demo
# 获取现有的PG数和PGP数值,之前创建的是64个
[root@ceph-node1 ~]# ceph osd pool get ceph-demo pg_num
pg_num: 64
[root@ceph-node1 ~]# ceph osd pool get ceph-demo pgp_num
pgp_num: 64
# 检查存储池的副本数
[root@ceph-node1 ~]# ceph osd dump | grep ceph | grep -i size
pool 5 'ceph-demo' replicated size 3 min_size 2 crush_rule 0 object_hash rjenkins pg_num 64 pgp_num 64 autoscale_mode warn last_change 84 flags hashpspool,selfmanaged_snaps stripe_width 0 application rbd
# 重新计算,pg的数量应该为32,重新设置,前提是集群为健康状态
[root@ceph-node1 ~]# ceph health
HEALTH_OK
[root@ceph-node1 ~]# ceph osd pool set ceph-demo pg_num 32
set pool 5 pg_num to 32
# pg调整期间使用ceph -w监测状态,可以看到集群状态的详细信息,可以看到数据的再平衡过程。等状态再次恢复正常后([INF] overall HEALTH_OK),再调整pgp
[root@ceph-node1 ~]# ceph osd pool set ceph-demo pgp_num 32
set pool 5 pgp_num to 32
一个 Ceph 文件系统需要至少两个 RADOS 存储池,一个用于数据、一个用于元数据。配置这些存储池时需考虑:
- 为元数据存储池设置较高的副本水平,因为此存储池丢失任何数据都会导致整个文件系统失效。
- 为元数据存储池分配低延时存储器(像 SSD ),因为它会直接影响到客户端的操作延时。
要用默认设置为文件系统创建两个存储池,你可以用下列命令创建新的存储池
[root@ceph-node1 ~]# ceph osd pool create cephfs_data 32
pool 'cephfs_data' created
[root@ceph-node1 ~]# ceph osd pool create cephfs_metadata 32
pool 'cephfs_metadata' created
创建CephFS文件系统:ceph fs new
- 命令:ceph fs new <fs_name> <metadata> <data>,其中metadata和data是指定pool的名称。
[root@ceph-node1 ~]# ceph fs new cephfs cephfs_metadata cephfs_data
new fs with metadata pool 8 and data pool 7
[root@ceph-node1 ~]# ceph fs ls
name: cephfs, metadata pool: cephfs_metadata, data pools: [cephfs_data ]
文件系统创建完毕后, MDS 服务器就能达到 active 状态了,比如在一个单 MDS 系统中:
[root@ceph-node2 ~]# ceph mds stat
cephfs:1 {0=ceph-node1=up:active} # 原来的状态值为: mds: 1 up:standby
挂载CephFS文件系统
# 直接挂在会失败
[root@ceph-client ~]# mount -t ceph 192.168.168.201:6789:/ /mnt/mycephfs/
2021-04-05 13:56:40.777 7f21ba8c3b40 -1 auth: unable to find a keyring on /etc/ceph/ceph.client.guest.keyring,/etc/ceph/ceph.keyring,/etc/ceph/keyring,/etc/ceph/keyring.bin,: (2) No such file or directory
mount error 22 = Invalid argument
# 查看密钥内容
[root@ceph-client ~]# cat /etc/ceph/ceph.client.admin.keyring
[client.admin]
key = AQCbJVdg2GmdARAAZITuTKfKWlO4wRLFNkU+rA==
caps mds = "allow *"
caps mgr = "allow *"
caps mon = "allow *"
caps osd = "allow *"
# 要挂载启用了 cephx 认证的 Ceph 文件系统,你必须指定用户名、密钥。
[root@ceph-client ~]# mount -t ceph 192.168.168.201:6789:/ /mnt/mycephfs/ -o name=admin,secret=AQCbJVdg2GmdARAAZITuTKfKWlO4wRLFNkU+rA==
[root@ceph-client ~]# df -h | grep mnt
192.168.168.201:6789:/ 8.3G 0 8.3G 0% /mnt/mycephfs
# 取消挂载
[root@ceph-client ~]# umount /mnt/mycephfs/
# 前述用法会把密码遗留在 Bash 历史里,更安全的方法是从文件读密码。先将密钥写入文件
[root@ceph-client ~]# cat /etc/ceph/ceph.client.admin.keyring | grep key | awk -F " = " '{print $2}' > /etc/ceph/admin.secret
# 再次挂载
[root@ceph-client ~]# mount -t ceph 192.168.168.201:6789:/ /mnt/mycephfs/ -o name=admin,secretfile=/etc/ceph/admin.secret
[root@ceph-client ~]# df -h | grep mnt
192.168.168.201:6789:/ 8.3G 0 8.3G 0% /mnt/mycephfs
常用命令
启动ceph进程
# 启动mon进程
service ceph start mon.ceph-node1
# 启动msd进程
service ceph start mds.ceph-node1
# 启动osd进程
service ceph start osd.0
查看集群状态
- 集群健康状态:ceph health
[root@ceph-node2 ~]# ceph health
HEALTH_OK
- 集群运行状态:ceph -s、ceph status
[root@ceph-node2 ~]# ceph -s
cluster:
id: 5f844a3b-1257-4089-bff5-836def275bf0
health: HEALTH_OK
services:
mon: 3 daemons, quorum ceph-node1,ceph-node2,ceph-node3 (age 14m)
mgr: ceph-node1(active, since 14m), standbys: ceph-node2, ceph-node3
mds: 1 up:standby
osd: 3 osds: 3 up (since 14m), 3 in (since 46h)
rgw: 1 daemon active (ceph-node1)
task status:
data:
pools: 4 pools, 128 pgs
objects: 187 objects, 1.2 KiB
usage: 3.0 GiB used, 27 GiB / 30 GiB avail
pgs: 128 active+clean
- 集群运行实时状态:ceph -w、watch ceph -s
# 查看ceph的实时运行状态
[root@ceph-node3 ~]# ceph -w
- 集群健康状态细节:ceph health detail
[root@ceph-node2 ~]# ceph health detail
HEALTH_OK
空间利用率
- 存储空间:ceph df
[root@ceph-node2 ~]# ceph df
RAW STORAGE:
CLASS SIZE AVAIL USED RAW USED %RAW USED
hdd 30 GiB 27 GiB 28 MiB 3.0 GiB 10.09
TOTAL 30 GiB 27 GiB 28 MiB 3.0 GiB 10.09
POOLS:
POOL ID PGS STORED OBJECTS USED %USED MAX AVAIL
.rgw.root 1 32 1.2 KiB 4 768 KiB 0 8.5 GiB
default.rgw.control 2 32 0 B 8 0 B 0 8.5 GiB
default.rgw.meta 3 32 0 B 0 0 B 0 8.5 GiB
default.rgw.log 4 32 0 B 175 0 B 0 8.5 GiB
获取秘钥列表
[root@ceph-node2 ~]# ceph auth list
installed auth entries:
mds.ceph-node1
key: AQBqQFdgMWhWBBAA3qq0Sc60KRFbY899pSpCxw==
caps: [mds] allow
caps: [mon] allow profile mds
caps: [osd] allow rwx
osd.0
key: AQDPNldgZ1g5LhAAzIlcxSIRfpXxULQKsfPPWA==
caps: [mgr] allow profile osd
caps: [mon] allow profile osd
caps: [osd] allow *
osd.1
key: AQCoN1dgSSeKOBAAohpiupICOFJc4hOtFg93Bg==
caps: [mgr] allow profile osd
caps: [mon] allow profile osd
caps: [osd] allow *
osd.2
key: AQC9N1dgruA9CxAAVxbhFI6QKbrs79h3IINLHw==
caps: [mgr] allow profile osd
caps: [mon] allow profile osd
caps: [osd] allow *
client.admin
key: AQCbJVdg2GmdARAAZITuTKfKWlO4wRLFNkU+rA==
caps: [mds] allow *
caps: [mgr] allow *
caps: [mon] allow *
caps: [osd] allow *
client.bootstrap-mds
key: AQCbJVdgGn6dARAAXkUVgCfy4AK9pYT6OZMjlw==
caps: [mon] allow profile bootstrap-mds
client.bootstrap-mgr
key: AQCbJVdg8Y2dARAA/vtYnToOr1dif6jdWIf3aw==
caps: [mon] allow profile bootstrap-mgr
client.bootstrap-osd
key: AQCbJVdgA52dARAAkZhIhd8gJ8ymvO3xNKLu0w==
caps: [mon] allow profile bootstrap-osd
client.bootstrap-rbd
key: AQCbJVdgf6ydARAAjDIiCKrYomzVghM689m9mw==
caps: [mon] allow profile bootstrap-rbd
client.bootstrap-rbd-mirror
key: AQCbJVdgZbydARAA8QvghuiMZ+ZhRCY09HdaCA==
caps: [mon] allow profile bootstrap-rbd-mirror
client.bootstrap-rgw
key: AQCbJVdgFsudARAAR6fUVPws29VuOSFzihXc8g==
caps: [mon] allow profile bootstrap-rgw
client.rgw.ceph-node1
key: AQCtV1dgV6frIRAA1X3p1VDmeDKOcHcXb0C+2g==
caps: [mon] allow rw
caps: [osd] allow rwx
mgr.ceph-node1
key: AQBhM1dgTML+FBAAe6sZ4WaHkCukQ37r4vP44w==
caps: [mds] allow *
caps: [mon] allow profile mgr
caps: [osd] allow *
mgr.ceph-node2
key: AQCtS1dgC72IMRAAvN4Sz3e1wx9HSg52aLzMIg==
caps: [mds] allow *
caps: [mon] allow profile mgr
caps: [osd] allow *
mgr.ceph-node3
key: AQCvS1dgDuo9IxAAB1T1R2naiGYbeddCND3HUQ==
caps: [mds] allow *
caps: [mon] allow profile mgr
caps: [osd] allow *
查看集群配置
- 集群的详细配置:ceph daemon mon.ceph-node1 config show
[root@ceph-node1 ~]# ceph daemon mon.ceph-node1 config show | more
查看日志目录
- 查看ceph log日志所在的目录:ceph-conf --name mon.ceph-node1 --show-config-value log_file
[root@ceph-node1 ~]# ceph-conf --name mon.ceph-node1 --show-config-value log_file
/var/log/ceph/ceph-mon.ceph-node1.log
[root@ceph-node1 ~]# ceph-conf --name mon.ceph-node2 --show-config-value log_file
/var/log/ceph/ceph-mon.ceph-node2.log
# 只能到对应机器上才看查询日志
mon
mon状态信息
- 状态信息:ceph mon stat
[root@ceph-node1 ~]# ceph mon stat
e3: 3 mons at {ceph-node1=[v2:192.168.168.201:3300/0,v1:192.168.168.201:6789/0],ceph-node2=[v2:192.168.168.202:3300/0,v1:192.168.168.202:6789/0],ceph-node3=[v2:192.168.168.203:3300/0,v1:192.168.168.203:6789/0]}, election epoch 62, leader 0 ceph-node1, quorum 0,1,2 ceph-node1,ceph-node2,ceph-node3
[root@ceph-node1 ~]# ceph mon stat --format json-pretty
# 可以用json格式显示
{
"epoch": 3,
"min_mon_release_name": "14",
"num_mons": 3,
"leader": "ceph-node1",
"quorum": [
{
"rank": 0,
"name": "ceph-node1"
},
{
"rank": 1,
"name": "ceph-node2"
},
{
"rank": 2,
"name": "ceph-node3"
}
]
}
- 查看状态:ceph mon_status
[root@ceph-node1 ~]# ceph mon_status --format json-pretty
{
"name": "ceph-node2",
"rank": 1,
"state": "peon",
"election_epoch": 62,
"quorum": [
0,
1,
2
],
"quorum_age": 2623,
"features": {
"required_con": "2449958747315912708",
"required_mon": [
"kraken",
"luminous",
"mimic",
"osdmap-prune",
"nautilus"
],
"quorum_con": "4611087854035861503",
"quorum_mon": [
"kraken",
"luminous",
"mimic",
"osdmap-prune",
"nautilus"
]
},
"outside_quorum": [],
"extra_probe_peers": [],
"sync_provider": [],
"monmap": {
"epoch": 3,
"fsid": "5f844a3b-1257-4089-bff5-836def275bf0",
"modified": "2021-03-21 22:10:22.992987",
"created": "2021-03-21 18:53:14.599473",
"min_mon_release": 14,
"min_mon_release_name": "nautilus",
"features": {
"persistent": [
"kraken",
"luminous",
"mimic",
"osdmap-prune",
"nautilus"
],
"optional": []
},
"mons": [
{
"rank": 0,
"name": "ceph-node1",
"public_addrs": {
"addrvec": [
{
"type": "v2",
"addr": "192.168.168.201:3300",
"nonce": 0
},
{
"type": "v1",
"addr": "192.168.168.201:6789",
"nonce": 0
}
]
},
"addr": "192.168.168.201:6789/0",
"public_addr": "192.168.168.201:6789/0"
},
{
"rank": 1,
"name": "ceph-node2",
"public_addrs": {
"addrvec": [
{
"type": "v2",
"addr": "192.168.168.202:3300",
"nonce": 0
},
{
"type": "v1",
"addr": "192.168.168.202:6789",
"nonce": 0
}
]
},
"addr": "192.168.168.202:6789/0",
"public_addr": "192.168.168.202:6789/0"
},
{
"rank": 2,
"name": "ceph-node3",
"public_addrs": {
"addrvec": [
{
"type": "v2",
"addr": "192.168.168.203:3300",
"nonce": 0
},
{
"type": "v1",
"addr": "192.168.168.203:6789",
"nonce": 0
}
]
},
"addr": "192.168.168.203:6789/0",
"public_addr": "192.168.168.203:6789/0"
}
]
},
"feature_map": {
"mon": [
{
"features": "0x3ffddff8ffecffff",
"release": "luminous",
"num": 1
}
],
"osd": [
{
"features": "0x3ffddff8ffecffff",
"release": "luminous",
"num": 1
}
],
"client": [
{
"features": "0x3ffddff8ffecffff",
"release": "luminous",
"num": 2
}
],
"mgr": [
{
"features": "0x3ffddff8ffecffff",
"release": "luminous",
"num": 2
}
]
}
}
- 选举状态:ceph quorum_status
[root@ceph-node1 ~]# ceph quorum_status
{"election_epoch":62,"quorum":[0,1,2],"quorum_names":["ceph-node1","ceph-node2","ceph-node3"],"quorum_leader_name":"ceph-node1","quorum_age":2313,"monmap":{"epoch":3,"fsid":"5f844a3b-1257-4089-bff5-836def275bf0","modified":"2021-03-21 22:10:22.992987","created":"2021-03-21 18:53:14.599473","min_mon_release":14,"min_mon_release_name":"nautilus","features":{"persistent":["kraken","luminous","mimic","osdmap-prune","nautilus"],"optional":[]},"mons":[{"rank":0,"name":"ceph-node1","public_addrs":{"addrvec":[{"type":"v2","addr":"192.168.168.201:3300","nonce":0},{"type":"v1","addr":"192.168.168.201:6789","nonce":0}]},"addr":"192.168.168.201:6789/0","public_addr":"192.168.168.201:6789/0"},{"rank":1,"name":"ceph-node2","public_addrs":{"addrvec":[{"type":"v2","addr":"192.168.168.202:3300","nonce":0},{"type":"v1","addr":"192.168.168.202:6789","nonce":0}]},"addr":"192.168.168.202:6789/0","public_addr":"192.168.168.202:6789/0"},{"rank":2,"name":"ceph-node3","public_addrs":{"addrvec":[{"type":"v2","addr":"192.168.168.203:3300","nonce":0},{"type":"v1","addr":"192.168.168.203:6789","nonce":0}]},"addr":"192.168.168.203:6789/0","public_addr":"192.168.168.203:6789/0"}]}}
[root@ceph-node1 ~]# ceph quorum_status --format json-pretty
# json显示
{
"election_epoch": 62,
"quorum": [
0,
1,
2
],
"quorum_names": [
"ceph-node1",
"ceph-node2",
"ceph-node3"
],
"quorum_leader_name": "ceph-node1",
"quorum_age": 2330,
"monmap": {
"epoch": 3,
"fsid": "5f844a3b-1257-4089-bff5-836def275bf0",
"modified": "2021-03-21 22:10:22.992987",
"created": "2021-03-21 18:53:14.599473",
"min_mon_release": 14,
"min_mon_release_name": "nautilus",
"features": {
"persistent": [
"kraken",
"luminous",
"mimic",
"osdmap-prune",
"nautilus"
],
"optional": []
},
"mons": [
{
"rank": 0,
"name": "ceph-node1",
"public_addrs": {
"addrvec": [
{
"type": "v2",
"addr": "192.168.168.201:3300",
"nonce": 0
},
{
"type": "v1",
"addr": "192.168.168.201:6789",
"nonce": 0
}
]
},
"addr": "192.168.168.201:6789/0",
"public_addr": "192.168.168.201:6789/0"
},
{
"rank": 1,
"name": "ceph-node2",
"public_addrs": {
"addrvec": [
{
"type": "v2",
"addr": "192.168.168.202:3300",
"nonce": 0
},
{
"type": "v1",
"addr": "192.168.168.202:6789",
"nonce": 0
}
]
},
"addr": "192.168.168.202:6789/0",
"public_addr": "192.168.168.202:6789/0"
},
{
"rank": 2,
"name": "ceph-node3",
"public_addrs": {
"addrvec": [
{
"type": "v2",
"addr": "192.168.168.203:3300",
"nonce": 0
},
{
"type": "v1",
"addr": "192.168.168.203:6789",
"nonce": 0
}
]
},
"addr": "192.168.168.203:6789/0",
"public_addr": "192.168.168.203:6789/0"
}
]
}
}
- 映射信息:ceph mon dump
[root@ceph-node1 ~]# ceph mon dump
dumped monmap epoch 3
epoch 3
fsid 5f844a3b-1257-4089-bff5-836def275bf0
last_changed 2021-03-21 22:10:22.992987
created 2021-03-21 18:53:14.599473
min_mon_release 14 (nautilus)
0: [v2:192.168.168.201:3300/0,v1:192.168.168.201:6789/0] mon.ceph-node1
1: [v2:192.168.168.202:3300/0,v1:192.168.168.202:6789/0] mon.ceph-node2
2: [v2:192.168.168.203:3300/0,v1:192.168.168.203:6789/0] mon.ceph-node3
- 查看mon的详细状态:ceph daemon mon.ceph-node1 mon_status
[root@ceph-node1 ~]# ceph daemon mon.ceph-node1 mon_status
# 只能在对应的节点上运行
- 获得一个正在运行的mon map,并保存在mon_map.txt文件中:ceph mon getmap -o mon_map.txt
[root@ceph-node1 ~]# ceph mon getmap -o mon_map.txt
got monmap epoch 3
- 查看上面获得的map:monmaptool –print mon_map.txt 和ceph mon dump返回值一样
[root@ceph-node1 ~]# monmaptool --print mon_map.txt
monmaptool: monmap file mon_map.txt
epoch 3
fsid 5f844a3b-1257-4089-bff5-836def275bf0
last_changed 2021-03-21 22:10:22.992987
created 2021-03-21 18:53:14.599473
min_mon_release 14 (nautilus)
0: [v2:192.168.168.201:3300/0,v1:192.168.168.201:6789/0] mon.ceph-node1
1: [v2:192.168.168.202:3300/0,v1:192.168.168.202:6789/0] mon.ceph-node2
2: [v2:192.168.168.203:3300/0,v1:192.168.168.203:6789/0] mon.ceph-node3
- 查看mon的admin socket
[root@ceph-node1 ~]# ceph-conf --name mon.ceph-node1 --show-config-value admin_socket
/var/run/ceph/ceph-mon.ceph-node1.asok
mon操作
- 删除一个mon节点:ceph mon remove ceph-node2
[root@ceph-node2 ~]# ceph mon remove ceph-node2
removing mon.ceph-node2 at [v2:192.168.168.202:3300/0,v1:192.168.168.202:6789/0], there will be 2 monitors
# 通过ceph -w实时监控到的状态变化
2021-03-24 22:05:21.524555 mon.ceph-node3 [INF] mon.ceph-node3 calling monitor election
2021-03-24 22:05:23.499174 mon.ceph-node1 [INF] mon.ceph-node1 calling monitor election
2021-03-24 22:05:23.506510 mon.ceph-node1 [INF] mon.ceph-node1 is new leader, mons ceph-node1,ceph-node3 in quorum (ranks 0,1)
- 将删除的节点添加进来:mon add <name> <IPaddr[:port]> : add new monitor named <name> at <addr>
[root@ceph-node2 ~]# ceph mon add ceph-node2 192.168.168.202
adding mon.ceph-node2 at [v2:192.168.168.202:3300/0,v1:192.168.168.202:6789/0]
# ceph -w 显示的状态变化
2021-03-24 22:07:53.803089 mon.ceph-node1 [WRN] Health check failed: 1/3 mons down, quorum ceph-node1,ceph-node3 (MON_DOWN)
2021-03-24 22:07:53.814976 mon.ceph-node1 [WRN] Health detail: HEALTH_WARN 1/3 mons down, quorum ceph-node1,ceph-node3
2021-03-24 22:07:53.815005 mon.ceph-node1 [WRN] MON_DOWN 1/3 mons down, quorum ceph-node1,ceph-node3
2021-03-24 22:07:53.815015 mon.ceph-node1 [WRN] mon.ceph-node2 (rank 2) addr [v2:192.168.168.202:3300/0,v1:192.168.168.202:6789/0] is down (out of quorum)
2021-03-24 22:10:00.002661 mon.ceph-node1 [WRN] overall HEALTH_WARN 1/3 mons down, quorum ceph-node1,ceph-node3
# 集群状态
[root@ceph-node1 ~]# ceph health detail
HEALTH_WARN 1/3 mons down, quorum ceph-node1,ceph-node3
MON_DOWN 1/3 mons down, quorum ceph-node1,ceph-node3
mon.ceph-node2 (rank 2) addr [v2:192.168.168.202:3300/0,v1:192.168.168.202:6789/0] is down (out of quorum)
[root@ceph-node2 ~]# ceph mon dump
dumped monmap epoch 8
epoch 8
fsid 5f844a3b-1257-4089-bff5-836def275bf0
last_changed 2021-03-24 22:25:27.597283
created 2021-03-21 18:53:14.599473
min_mon_release 14 (nautilus)
0: [v2:192.168.168.201:3300/0,v1:192.168.168.201:6789/0] mon.ceph-node1
1: [v2:192.168.168.203:3300/0,v1:192.168.168.203:6789/0] mon.ceph-node3
[root@ceph-node2 ~]# ceph mon add ceph-node2 192.168.168.202:6789
adding mon.ceph-node2 at [v2:192.168.168.202:3300/0,v1:192.168.168.202:6789/0]
[root@ceph-node2 ~]# ceph mon dump
dumped monmap epoch 9
epoch 9
fsid 5f844a3b-1257-4089-bff5-836def275bf0
last_changed 2021-03-24 22:27:21.087636
created 2021-03-21 18:53:14.599473
min_mon_release 14 (nautilus)
0: [v2:192.168.168.201:3300/0,v1:192.168.168.201:6789/0] mon.ceph-node1
1: [v2:192.168.168.203:3300/0,v1:192.168.168.203:6789/0] mon.ceph-node3
2: [v2:192.168.168.202:3300/0,v1:192.168.168.202:6789/0] mon.ceph-node2
mds
mds状态信息
- 查看msd状态:ceph mds stat
[root@ceph-node1 ~]# ceph mds stat
1 up:standby
- 查看msd的映射信息:ceph mds dump
ceph mds dump
mds操作
- 删除一个mds节点:ceph mds rm 0 mds.ceph-node1
ceph mds rm 0 mds.ceph-node1
osd
osd状态信息
- 查看ceph osd运行状态:ceph osd stat
[root@ceph-node1 ~]# ceph osd stat
3 osds: 3 up (since 3h), 3 in (since 13d); epoch: e286
- 查看osd映射信息:ceph osd dump
[root@ceph-node1 ~]# ceph osd dump
epoch 286
fsid 5f844a3b-1257-4089-bff5-836def275bf0
created 2021-03-21 18:53:15.026826
modified 2021-04-05 15:58:21.728012
flags sortbitwise,recovery_deletes,purged_snapdirs,pglog_hardlimit
crush_version 7
full_ratio 0.95
backfillfull_ratio 0.9
nearfull_ratio 0.85
require_min_compat_client jewel
min_compat_client jewel
require_osd_release nautilus
pool 1 '.rgw.root' replicated size 3 min_size 2 crush_rule 0 object_hash rjenkins pg_num 32 pgp_num 32 autoscale_mode warn last_change 18 flags hashpspool stripe_width 0 application rgw
pool 2 'default.rgw.control' replicated size 3 min_size 2 crush_rule 0 object_hash rjenkins pg_num 32 pgp_num 32 autoscale_mode warn last_change 20 flags hashpspool stripe_width 0 application rgw
pool 3 'default.rgw.meta' replicated size 3 min_size 2 crush_rule 0 object_hash rjenkins pg_num 32 pgp_num 32 autoscale_mode warn last_change 22 flags hashpspool stripe_width 0 application rgw
pool 4 'default.rgw.log' replicated size 3 min_size 2 crush_rule 0 object_hash rjenkins pg_num 32 pgp_num 32 autoscale_mode warn last_change 24 flags hashpspool stripe_width 0 application rgw
pool 5 'ceph-demo' replicated size 3 min_size 2 crush_rule 0 object_hash rjenkins pg_num 32 pgp_num 32 autoscale_mode warn last_change 270 lfor 0/268/266 flags hashpspool,selfmanaged_snaps stripe_width 0 application rbd
removed_snaps [1~3]
pool 7 'cephfs_data' replicated size 3 min_size 2 crush_rule 0 object_hash rjenkins pg_num 32 pgp_num 32 autoscale_mode warn last_change 286 flags hashpspool,pool_snaps stripe_width 0 application cephfs
pool 8 'cephfs_metadata' replicated size 3 min_size 2 crush_rule 0 object_hash rjenkins pg_num 32 pgp_num 32 autoscale_mode warn last_change 277 flags hashpspool stripe_width 0 pg_autoscale_bias 4 pg_num_min 16 recovery_priority 5 application cephfs
max_osd 3
osd.0 up in weight 1 up_from 121 up_thru 274 down_at 120 last_clean_interval [108,115) [v2:192.168.168.201:6802/1239,v1:192.168.168.201:6803/1239] [v2:192.168.168.201:6804/1239,v1:192.168.168.201:6805/1239] exists,up 29cd06e6-ebe0-4e3e-b37f-d27c2b29892f
osd.1 up in weight 1 up_from 120 up_thru 274 down_at 116 last_clean_interval [108,115) [v2:192.168.168.202:6800/1243,v1:192.168.168.202:6801/1243] [v2:192.168.168.202:6802/1243,v1:192.168.168.202:6803/1243] exists,up 4bd897e9-a7c5-4732-85d0-e096fb4e9d09
osd.2 up in weight 1 up_from 118 up_thru 274 down_at 117 last_clean_interval [111,115) [v2:192.168.168.203:6800/1182,v1:192.168.168.203:6801/1182] [v2:192.168.168.203:6802/1182,v1:192.168.168.203:6803/1182] exists,up b22b12d0-3751-49bb-ac27-133dd3793886
- 查看osd的目录树:ceph osd tree
[root@ceph-node1 ~]# ceph osd tree
ID CLASS WEIGHT TYPE NAME STATUS REWEIGHT PRI-AFF
-1 0.02939 root default
-3 0.00980 host ceph-node1
0 hdd 0.00980 osd.0 up 1.00000 1.00000
-5 0.00980 host ceph-node2
1 hdd 0.00980 osd.1 up 1.00000 1.00000
-7 0.00980 host ceph-node3
2 hdd 0.00980 osd.2 up 1.00000 1.00000
osd操作
- down掉一个osd硬盘:ceph osd down 0,down掉osd.0节点
ceph osd down 0
- 在集群中删除一个osd硬盘:ceph osd rm 0
ceph osd rm 0
- 在集群中删除一个osd 硬盘 crush map:ceph osd crush rm osd.0
ceph osd crush rm osd.0
- 在集群中删除一个osd的host节点:ceph osd crush rm ceph-node1
ceph osd crush rm ceph-node1
- 查看最大osd的个数:ceph osd getmaxosd
ceph osd getmaxosd#默认最大是4个osd节点
- 置最大的osd的个数(当扩大osd节点的时候必须扩大这个值):ceph osd setmaxosd 10
ceph osd setmaxosd 10
- 设置osd crush的权重为1.0:ceph osd crush set {id} {weight} [{loc1} [{loc2} …]]
ceph osd crush set 3 3.0 host=ceph-node4
ceph osd crush reweight osd.3 1.0
- 设置osd的权重:ceph osd reweight 3 0.5
ceph osd reweight 3 0.5
- 把一个osd节点逐出集群:ceph osd out osd.3
ceph osd out osd.3
ceph osd tree# osd.3的reweight变为0了就不再分配数据,但是设备还是存活的
- 把逐出的osd加入集群:ceph osd in osd.3
ceph osd in osd.3
ceph osd tree
- 暂停osd (暂停后整个集群不再接收数据):ceph osd pause
ceph osd pause
- 再次开启osd (开启后再次接收数据):ceph osd unpause
ceph osd unpause
- 查看一个集群osd.2参数的配置:ceph –admin-daemon /var/run/ceph/ceph-osd.2.asok config show | less
ceph –admin-daemon /var/run/ceph/ceph-osd.2.asok config show | less
pool
查看poll数量
[root@ceph-node1 ~]# ceph osd lspools
1 .rgw.root
2 default.rgw.control
3 default.rgw.meta
4 default.rgw.log
5 ceph-demo
7 cephfs_data
8 cephfs_metadata
创建/删除pool
[root@ceph-node1 ~]# ceph osd pool create cephfs_data 32
pool 'cephfs_data' created
删除一个pool会同时清空pool的所有数据,因此非常危险。(和rm -rf类似)。因此删除pool时ceph要求必须输入两次pool名称,同时加上--yes-i-really-really-mean-it选项。
# 删除该 pool
[root@ceph-node1 ~]# ceph osd pool delete cephfs_data cephfs_data --yes-i-really-really-mean-it
Error EPERM: pool deletion is disabled; you must first set the mon_allow_pool_delete config option to true before you can destroy a pool
# 提示需要在ceph.conf配置文件中加入下一行配置
[root@ceph-node1 ~]# cat /etc/ceph/ceph.conf | grep delete
mon_allow_pool_delete = true
# 重启ceph-mon服务
[root@ceph-node1 ~]# systemctl restart ceph-mon.target
# 重新删除就成功了
[root@ceph-node1 ~]# ceph osd pool delete cephfs_data cephfs_data --yes-i-really-really-mean-it
pool 'cephfs_data' removed
查看pool详细信息
[root@ceph-node1 ~]# rados df
POOL_NAME USED OBJECTS CLONES COPIES MISSING_ON_PRIMARY UNFOUND DEGRADED RD_OPS RD WR_OPS WR USED COMPR UNDER COMPR
.rgw.root 768 KiB 4 0 12 0 0 0 33 33 KiB 4 4 KiB 0 B 0 B
ceph-demo 652 MiB 78 0 234 0 0 0 1329 7.5 MiB 258 225 MiB 0 B 0 B
cephfs_data 0 B 0 0 0 0 0 0 0 0 B 0 0 B 0 B 0 B
cephfs_metadata 1.5 MiB 22 0 66 0 0 0 0 0 B 51 19 KiB 0 B 0 B
default.rgw.control 0 B 8 0 24 0 0 0 0 0 B 0 0 B 0 B 0 B
default.rgw.log 0 B 175 0 525 0 0 0 38706 38 MiB 25844 0 B 0 B 0 B
default.rgw.meta 0 B 0 0 0 0 0 0 0 0 B 0 0 B 0 B 0 B
total_objects 287
total_used 3.7 GiB
total_avail 26 GiB
total_space 30 GiB
查看pool使用情况
[root@ceph-node1 ~]# ceph df
RAW STORAGE:
CLASS SIZE AVAIL USED RAW USED %RAW USED
hdd 30 GiB 26 GiB 763 MiB 3.7 GiB 12.49
TOTAL 30 GiB 26 GiB 763 MiB 3.7 GiB 12.49
POOLS:
POOL ID PGS STORED OBJECTS USED %USED MAX AVAIL
.rgw.root 1 32 1.2 KiB 4 768 KiB 0 8.2 GiB
default.rgw.control 2 32 0 B 8 0 B 0 8.2 GiB
default.rgw.meta 3 32 0 B 0 0 B 0 8.2 GiB
default.rgw.log 4 32 0 B 175 0 B 0 8.2 GiB
ceph-demo 5 32 216 MiB 78 652 MiB 2.51 8.2 GiB
cephfs_data 7 32 11 B 1 192 KiB 0 8.2 GiB
cephfs_metadata 8 32 81 KiB 23 1.9 MiB 0 8.2 GiB
为pool配置配额
支持object个数配额以及容量大小配额。
# 设置允许最大object数量为10
[root@ceph-node1 ~]# ceph osd pool set-quota cephfs_data max_objects 10
set-quota max_objects = 10 for pool cephfs_data
# 设置允许容量限制为10GB
[root@ceph-node1 ~]# ceph osd pool set-quota cephfs_data max_bytes $((12 * 1024 * 1024 * 1024))
set-quota max_bytes = 12884901888 for pool cephfs_data
# 限制存储数据大小为10M
[root@ceph-node1 ~]# ceph osd pool set-quota cephfs_data max_bytes 100M
set-quota max_bytes = 104857600 for pool cephfs_data
# 查看配额
[root@ceph-node1 ~]# ceph osd pool get-quota cephfs_data
quotas for pool 'cephfs_data':
max objects: 10 objects
max bytes : 12 GiB
取消配额限制只需要把对应值设为0即可。
[root@ceph-node1 ~]# ceph osd pool set-quota cephfs_data max_objects 0
set-quota max_objects = 0 for pool cephfs_data
[root@ceph-node1 ~]# ceph osd pool set-quota cephfs_data max_bytes 0
set-quota max_bytes = 0 for pool cephfs_data
[root@ceph-node1 ~]# ceph osd pool get-quota cephfs_data
quotas for pool 'cephfs_data':
max objects: N/A
max bytes : N/A
创建/删除pool快照
# cephfs_data为pool
[root@ceph-node1 ~]# ceph osd pool mksnap cephfs_data cephfs_data_snap_2021.04.05
created pool cephfs_data snap cephfs_data_snap_2021.04.05
[root@ceph-node1 ~]# ceph osd pool rmsnap cephfs_data cephfs_data_snap_2021.04.05
removed pool cephfs_data snap cephfs_data_snap_2021.04.05
查看pool池pg数量
参考链接:
https://docs.ceph.com/en/nautilus/start/quick-ceph-deploy/#storing-retrieving-object-data
配置文件详解
ceph.conf
- 该配置文件采用init文件语法,#和;为注释,ceph集群在启动的时候会按照顺序加载所有的conf配置文件。 配置文件分为以下几大块配置。
global:全局配置。
osd:osd专用配置,可以使用osd.N,来表示某一个OSD专用配置,N为osd的编号,如0、1、1等。
mon:mon专用配置,也可以使用mon.A来为某一个monitor节点做专用配置,其中A为该节点的名称,ceph-monitor-2、ceph-monitor-1等。使用命令 ceph mon dump可以获取节点的名称。
client:客户端专用配置。
- 配置文件可以从多个地方进行顺序加载,如果冲突将使用最新加载的配置,其加载顺序为。
$CEPH_CONF环境变量
-c 指定的位置
/etc/ceph/ceph.conf
~/.ceph/ceph.conf
./ceph.conf
- 配置文件还可以使用一些元变量应用到配置文件,如。
$cluster:当前集群名。
$type:当前服务类型。
$id:进程的标识符。
$host:守护进程所在的主机名。
$name:值为$type.$id。
- ceph.conf详细参数
[global]#全局设置
fsid = xxxxxxxxxxxxxxx # 集群标识ID
mon host = 10.0.1.1,10.0.1.2,10.0.1.3 #monitor IP 地址
auth cluster required = cephx #集群认证
auth service required = cephx #服务认证
auth client required = cephx #客户端认证
osd pool default size = 3 #最小副本数 默认是3
osd pool default min size = 1 #PG 处于 degraded 状态不影响其 IO 能力,min_size是一个PG能接受IO的最小副本数
public network = 10.0.1.0/24 #公共网络(monitorIP段)
cluster network = 10.0.2.0/24 #集群网络
max open files = 131072 #默认0#如果设置了该选项,Ceph会设置系统的max open fds
mon initial members = node1, node2, node3 #初始monitor (由创建monitor命令而定)
##############################################################
[mon]
mon data = /var/lib/ceph/mon/ceph-$id
mon clock drift allowed = 1 #默认值0.05 #monitor间的clock drift
mon osd min down reporters = 13 #默认值1 #向monitor报告down的最小OSD数
mon osd down out interval = 600 #默认值300 #标记一个OSD状态为down和out之前ceph等待的秒数
##############################################################
[osd]
osd data = /var/lib/ceph/osd/ceph-$id
osd mkfs type = xfs #格式化系统类型
osd max write size = 512 #默认值90 #OSD一次可写入的最大值(MB)
osd client message size cap = 2147483648 #默认值100 #客户端允许在内存中的最大数据(bytes)
osd deep scrub stride = 131072 #默认值524288 #在Deep Scrub时候允许读取的字节数(bytes)
osd op threads = 16 #默认值2 #并发文件系统操作数
osd disk threads = 4 #默认值1 #OSD密集型操作例如恢复和Scrubbing时的线程
osd map cache size = 1024 #默认值500 #保留OSD Map的缓存(MB)
osd map cache bl size = 128 #默认值50 #OSD进程在内存中的OSD Map缓存(MB)
osd mount options xfs = "rw,noexec,nodev,noatime,nodiratime,nobarrier" #默认值rw,noatime,inode64 #Ceph OSD xfs Mount选项
osd recovery op priority = 2 #默认值10 #恢复操作优先级,取值1-63,值越高占用资源越高
osd recovery max active = 10 #默认值15 #同一时间内活跃的恢复请求数
osd max backfills = 4 #默认值10 #一个OSD允许的最大backfills数
osd min pg log entries = 30000 #默认值3000 #修建PGLog是保留的最大PGLog数
osd max pg log entries = 100000 #默认值10000 #修建PGLog是保留的最大PGLog数
osd mon heartbeat interval = 40 #默认值30 #OSD ping一个monitor的时间间隔(默认30s)
ms dispatch throttle bytes = 1048576000 #默认值 104857600 #等待派遣的最大消息数
objecter inflight ops = 819200 #默认值1024 #客户端流控,允许的最大未发送io请求数,超过阀值会堵塞应用io,为0表示不受限
osd op log threshold = 50 #默认值5 #一次显示多少操作的log
osd crush chooseleaf type = 0 #默认值为1 #CRUSH规则用到chooseleaf时的bucket的类型
##############################################################
[client]
rbd cache = true #默认值 true #RBD缓存
rbd cache size = 335544320 #默认值33554432 #RBD缓存大小(bytes)
rbd cache max dirty = 134217728 #默认值25165824 #缓存为write-back时允许的最大dirty字节数(bytes),如果为0,使用write-through
rbd cache max dirty age = 30 #默认值1 #在被刷新到存储盘前dirty数据存在缓存的时间(seconds)
rbd cache writethrough until flush = false #默认值true #该选项是为了兼容linux-2.6.32之前的virtio驱动,避免因为不发送flush请求,数据不回写
#设置该参数后,librbd会以writethrough的方式执行io,直到收到第一个flush请求,才切换为writeback方式。
rbd cache max dirty object = 2 #默认值0 #最大的Object对象数,默认为0,表示通过rbd cache size计算得到,librbd默认以4MB为单位对磁盘Image进行逻辑切分
#每个chunk对象抽象为一个Object;librbd中以Object为单位来管理缓存,增大该值可以提升性能
rbd cache target dirty = 235544320 #默认值16777216 #开始执行回写过程的脏数据大小,不能超过 rbd_cache_max_dirty
猜你喜欢
- 2024-10-21 数据库同步 Elasticsearch 后数据不一致,怎么办
- 2024-10-21 (建议收藏)小白视角总结分布式搜索组件elasticsearch《二》
- 2024-10-21 RabbitMQ消息服务用户手册(rabbitmq消息id)
- 2024-10-21 索引生命周期管理ILM看完不懂你锤我
- 2024-10-21 Elasticsearch技术问答系列-NO3(elasticsearch curator)
- 2024-10-21 从裸机到700亿参数大模型,这里有份教程,还有现成可用的脚本
- 2024-10-21 「一文搞懂」Nacos健康检查机制(nacos修改健康检查模式)
- 2024-10-21 Kibana 最常见的“启动报错”的故障原因及解决方案汇总
- 2024-10-21 二进制部署Kubernetes V1.18.X(etcd集群篇)
- 2024-10-21 「超级详细」Nacos健康检查源码解析
- 最近发表
- 标签列表
-
- cmd/c (57)
- c++中::是什么意思 (57)
- sqlset (59)
- ps可以打开pdf格式吗 (58)
- phprequire_once (61)
- localstorage.removeitem (74)
- routermode (59)
- vector线程安全吗 (70)
- & (66)
- java (73)
- org.redisson (64)
- log.warn (60)
- cannotinstantiatethetype (62)
- js数组插入 (83)
- resttemplateokhttp (59)
- gormwherein (64)
- linux删除一个文件夹 (65)
- mac安装java (72)
- reader.onload (61)
- outofmemoryerror是什么意思 (64)
- flask文件上传 (63)
- eacces (67)
- 查看mysql是否启动 (70)
- java是值传递还是引用传递 (58)
- 无效的列索引 (74)