优秀的编程知识分享平台

网站首页 > 技术文章 正文

「ceph-deploy」CentOS7部署Ceph-nautilus 14.2.18版本集群学习

nanyue 2024-10-21 06:15:09 技术文章 3 ℃

CentOS7部署Ceph-nautilus 14.2.18版本集群

主机名

系统

IP

安装软件

ceph-admin

CentOS7.9

192.168.168.200

ceph-deploy

ceph-node1

CentOS7.9

192.168.168.201

ceph
mon
osd.0
mgr
mds

ceph-node2

CentOS7.9

192.168.168.202

ceph
mon
osd.1

ceph-node3

CentOS7.9

192.168.168.203

ceph
mon
osd.2

环境准备



版本信息

[root@ceph-admin my-cluster]# cat /proc/version 
Linux version 3.10.0-1062.4.1.el7.x86_64 (mockbuild@kbuilder.bsys.centos.org) (gcc version 4.8.5 20150623 (Red Hat 4.8.5-39) (GCC) ) #1 SMP Fri Oct 18 17:15:30 UTC 2019

[root@ceph-admin my-cluster]# cat /etc/redhat-release 
CentOS Linux release 7.9.2009 (Core)

修改主机名和hosts

[root@ceph-admin ~]# cat>>/etc/hosts<<EOF
> 192.168.168.200    ceph-admin
> 192.168.168.201    ceph-node1
> 192.168.168.202    ceph-node2
> 192.168.168.203    ceph-node3
> EOF

[root@ceph-admin ~]# cat /etc/hosts | grep ceoh
[root@ceph-admin ~]# cat /etc/hosts | grep ceph
192.168.168.200    ceph-admin
192.168.168.201    ceph-node1
192.168.168.202    ceph-node2
192.168.168.203    ceph-node3

配置epel源和ceph源(所有节点)

# 配置阿里云epel源,如果有,可以先做备份
wget -O /etc/yum.repos.d/epel.repo http://mirrors.aliyun.com/repo/epel-7.repo

# 配置阿里云ceph源
cat>/etc/yum.repos.d/ceph.repo<<EOF
[ceph-source]
name=Ceph source packages
baseurl=https://mirrors.aliyun.com/ceph/rpm-nautilus/el7/SRPMS/
enabled=1
gpgcheck=0
type=rpm-md
gpgkey=https://mirrors.aliyun.com/ceph/keys/release.asc
priority=1

[ceph-aarch64]
name=Ceph aarch64 packages
baseurl=https://mirrors.aliyun.com/ceph/rpm-nautilus/el7/aarch64/
enabled=1
gpgcheck=0
type=rpm-md
gpgkey=https://mirrors.aliyun.com/ceph/keys/release.asc
priority=1

[ceph-noarch]
name=Ceph noarch packages
baseurl=https://mirrors.aliyun.com/ceph/rpm-nautilus/el7/noarch/
enabled=1
gpgcheck=0
type=rpm-md
gpgkey=https://mirrors.aliyun.com/ceph/keys/release.asc
priority=1

[ceph-x86_64]
name=Ceph x86_64 packages
baseurl=https://mirrors.aliyun.com/ceph/rpm-nautilus/el7/x86_64/
enabled=1
gpgcheck=0
type=rpm-md
gpgkey=https://mirrors.aliyun.com/ceph/keys/release.asc
priority=1
EOF


# 更新并查看配置的仓库
yum update

# 执行完后再执行下面
yum repolist
# 应出现epel和ceph仓库信息

# 生成缓存
yum makecache

关闭防火墙及selinux(所有节点)

# 关闭防火墙并设置开启不启动
systemctl stop firewalld
systemctl disable firewalld

# 关闭selinux并设置开启不启动
setenforce 0
sed -i 's/SELINUX=enforcing/SELINUX=disabled/g' /etc/sysconfig/selinux

配置时间同步(所有节点)

管理节点配置时间服务

# 所有节点设置时区
timedatectl set-timezone Asia/Shanghai

cp /etc/chrony.conf /etc/chrony.conf.bak

# 修改为阿里云时间服务器
sed -i "/^server 0/cserver ntp.aliyun.com iburst" /etc/chrony.conf
sed -i "/^server 1/cserver ntp1.aliyun.com iburst" /etc/chrony.conf
sed -i "/^server 2/cserver ntp2.aliyun.com iburst" /etc/chrony.conf
sed -i "/^server 3/cserver ntp3.aliyun.com iburst" /etc/chrony.conf

# 新增以自己为server
sed -i '/^# Please / a\server 127.0.0.1 iburst' /etc/chrony.conf

# 配置允许访问的client网段
sed -i '/#allow / a\allow 192.168.168.0/24' /etc/chrony.conf
# 设置即使不同步其他时钟源,该server依然可以作为时钟源
sed -i '/^#local / s/^#\(.*\)$/\1/g' /etc/chrony.conf
# 重启chrony,使其生效
systemctl restart chronyd
# 检查时间同步是否正常,client端出现*号表示时间同步正常
chronyc sources

# 管理节点配置
[root@ceph-admin ~]# cat /etc/chrony.conf
# Use public servers from the pool.ntp.org project.
# Please consider joining the pool (http://www.pool.ntp.org/join.html).
server 127.0.0.1 iburst
server ntp.aliyun.com iburst
server ntp1.aliyun.com iburst
server ntp2.aliyun.com iburst
server ntp3.aliyun.com iburst

# Record the rate at which the system clock gains/losses time.
driftfile /var/lib/chrony/drift

# Allow the system clock to be stepped in the first three updates
# if its offset is larger than 1 second.
makestep 1.0 3

# Enable kernel synchronization of the real-time clock (RTC).
rtcsync

# Enable hardware timestamping on all interfaces that support it.
#hwtimestamp *

# Increase the minimum number of selectable sources required to adjust
# the system clock.
#minsources 2

# Allow NTP client access from local network.
#allow 192.168.0.0/16
allow 192.168.168.0/24

# Serve time even if not synchronized to a time source.
local stratum 10

# Specify file containing keys for NTP authentication.
#keyfile /etc/chrony.keys

其他节点配置

# 所有节点设置时区
timedatectl set-timezone Asia/Shanghai

cp /etc/chrony.conf /etc/chrony.conf.bak
# 注释原有的时间同步server
sed -i '/^server/ s/^\(.*\)$/# \1/g' /etc/chrony.conf
# 新增以ceph-admin为server
sed -i '/^# Please / a\server 192.168.168.200 iburst' /etc/chrony.conf
# 所有节点重启chrony,使其生效
systemctl restart chronyd
# 检查时间同步是否正常,client端出现*号表示时间同步正常
[root@ceph-node1 ~]# chronyc sources
210 Number of sources = 1
MS Name/IP address         Stratum Poll Reach LastRx Last sample               
===============================================================================
^* 192.168.168.200                3   6    17    38   +478ns[  -21us] +/-   21ms

设置admin节点免密登录

# ceph-admin节点操作
# 由于使用root用户安装,所以不创建新账号
# 创建公钥,选项全部默认
ssh-keygen

# 将公钥分发到各个node节点
for i in 1 2 3
do
    ssh-copy-id root@ceph-node$i
done

admin节点安装ceph-deploy

[root@ceph-admin ~]# yum -y install ceph-deploy

[root@ceph-admin ~]# ceph-deploy --version
Traceback (most recent call last):
  File "/usr/bin/ceph-deploy", line 18, in <module>
    from ceph_deploy.cli import main
  File "/usr/lib/python2.7/site-packages/ceph_deploy/cli.py", line 1, in <module>
    import pkg_resources
ImportError: No module named pkg_resources

[root@ceph-admin ~]# yum search python2-pip
已加载插件:fastestmirror
Loading mirror speeds from cached hostfile
 * base: mirrors.aliyun.com
 * extras: mirrors.aliyun.com
 * updates: mirrors.aliyun.com
================================================================= N/S matched: python2-pip =================================================================
python2-pip.noarch : A tool for installing and managing Python 2 packages

  名称和简介匹配 only,使用“search all”试试。
[root@ceph-admin ~]# yum install -y python2-pip
已安装:
  python2-pip.noarch 0:8.1.2-14.el7                                                                                                                         

作为依赖被安装:
  python-backports.x86_64 0:1.0-8.el7          python-backports-ssl_match_hostname.noarch 0:3.5.0.1-1.el7       python-ipaddress.noarch 0:1.0.16-2.el7      
  python-setuptools.noarch 0:0.9.8-7.el7      

完毕!
[root@ceph-admin ~]# ceph-deploy --version
2.0.1

部署集群

https://docs.ceph.com/en/nautilus/start/quick-ceph-deploy/#expanding-your-cluster

创建集群



admin节点创建集群

[root@ceph-admin ~]# pwd
/root
[root@ceph-admin ~]# mkdir my-cluster
[root@ceph-admin ~]# cd my-cluster/
[root@ceph-admin my-cluster]#


# 创建一个集群,
# 如果有多网卡,需配置 --public-network 192.168.168.0/24 用于对外服务,
#                    --cluster-network 192.168.1.0/24 用于集群内部同步
# 配置monitor节点为ceph-node1
# ceph-deploy new --public-network 192.168.168.0/24 --cluster-network 192.168.1.0/24 ceph-node1
[root@ceph-admin my-cluster]# ceph-deploy new ceph-node1
[ceph_deploy.conf][DEBUG ] found configuration file at: /root/.cephdeploy.conf
[ceph_deploy.cli][INFO  ] Invoked (2.0.1): /usr/bin/ceph-deploy new ceph-node1
[ceph_deploy.cli][INFO  ] ceph-deploy options:
[ceph_deploy.cli][INFO  ]  username                      : None
[ceph_deploy.cli][INFO  ]  func                          : <function new at 0x7f49f4477de8>
[ceph_deploy.cli][INFO  ]  verbose                       : False
[ceph_deploy.cli][INFO  ]  overwrite_conf                : False
[ceph_deploy.cli][INFO  ]  quiet                         : False
[ceph_deploy.cli][INFO  ]  cd_conf                       : <ceph_deploy.conf.cephdeploy.Conf instance at 0x7f49f3bf2518>
[ceph_deploy.cli][INFO  ]  cluster                       : ceph
[ceph_deploy.cli][INFO  ]  ssh_copykey                   : True
[ceph_deploy.cli][INFO  ]  mon                           : ['ceph-node1']
[ceph_deploy.cli][INFO  ]  public_network                : None
[ceph_deploy.cli][INFO  ]  ceph_conf                     : None
[ceph_deploy.cli][INFO  ]  cluster_network               : None
[ceph_deploy.cli][INFO  ]  default_release               : False
[ceph_deploy.cli][INFO  ]  fsid                          : None
[ceph_deploy.new][DEBUG ] Creating new cluster named ceph
[ceph_deploy.new][INFO  ] making sure passwordless SSH succeeds
[ceph-node1][DEBUG ] connected to host: ceph-admin 
[ceph-node1][INFO  ] Running command: ssh -CT -o BatchMode=yes ceph-node1
[ceph-node1][DEBUG ] connected to host: ceph-node1 
[ceph-node1][DEBUG ] detect platform information from remote host
[ceph-node1][DEBUG ] detect machine type
[ceph-node1][DEBUG ] find the location of an executable
[ceph-node1][INFO  ] Running command: /usr/sbin/ip link show
[ceph-node1][INFO  ] Running command: /usr/sbin/ip addr show
[ceph-node1][DEBUG ] IP addresses found: [u'192.168.168.201']
[ceph_deploy.new][DEBUG ] Resolving host ceph-node1
[ceph_deploy.new][DEBUG ] Monitor ceph-node1 at 192.168.168.201
[ceph_deploy.new][DEBUG ] Monitor initial members are ['ceph-node1']
[ceph_deploy.new][DEBUG ] Monitor addrs are ['192.168.168.201']
[ceph_deploy.new][DEBUG ] Creating a random mon key...
[ceph_deploy.new][DEBUG ] Writing monitor keyring to ceph.mon.keyring...
[ceph_deploy.new][DEBUG ] Writing initial config to ceph.conf...

[root@ceph-admin my-cluster]# ls
ceph.conf  ceph-deploy-ceph.log  ceph.mon.keyring

[root@ceph-admin my-cluster]# cat ceph.conf 
[global]
fsid = 5f844a3b-1257-4089-bff5-836def275bf0
mon_initial_members = ceph-node1
mon_host = 192.168.168.201
auth_cluster_required = cephx
auth_service_required = cephx
auth_client_required = cephx

[root@ceph-admin my-cluster]# cat ceph.mon.keyring 
[mon.]
key = AQCJFldgAAAAABAAP/KUN+lSmq04mFsIkXUzcA==
caps mon = allow *

安装Ceph包

ceph-deploy安装(不推荐)

[root@ceph-admin my-cluster]# ceph-deploy install ceph-node1 ceph-node2 ceph-node3
[ceph_deploy.conf][DEBUG ] found configuration file at: /root/.cephdeploy.conf
[ceph_deploy.cli][INFO  ] Invoked (2.0.1): /usr/bin/ceph-deploy install ceph-node1 ceph-node2 ceph-node3
[ceph_deploy.cli][INFO  ] ceph-deploy options:
[ceph_deploy.cli][INFO  ]  verbose                       : False
[ceph_deploy.cli][INFO  ]  testing                       : None
[ceph_deploy.cli][INFO  ]  cd_conf                       : <ceph_deploy.conf.cephdeploy.Conf instance at 0x7f505e53a830>
[ceph_deploy.cli][INFO  ]  cluster                       : ceph
[ceph_deploy.cli][INFO  ]  dev_commit                    : None
[ceph_deploy.cli][INFO  ]  install_mds                   : False
[ceph_deploy.cli][INFO  ]  stable                        : None
[ceph_deploy.cli][INFO  ]  default_release               : False
[ceph_deploy.cli][INFO  ]  username                      : None
[ceph_deploy.cli][INFO  ]  adjust_repos                  : True
[ceph_deploy.cli][INFO  ]  func                          : <function install at 0x7f505f0045f0>
[ceph_deploy.cli][INFO  ]  install_mgr                   : False
[ceph_deploy.cli][INFO  ]  install_all                   : False
[ceph_deploy.cli][INFO  ]  repo                          : False
[ceph_deploy.cli][INFO  ]  host                          : ['ceph-node1', 'ceph-node2', 'ceph-node3']
[ceph_deploy.cli][INFO  ]  install_rgw                   : False
[ceph_deploy.cli][INFO  ]  install_tests                 : False
[ceph_deploy.cli][INFO  ]  repo_url                      : None
[ceph_deploy.cli][INFO  ]  ceph_conf                     : None
[ceph_deploy.cli][INFO  ]  install_osd                   : False
[ceph_deploy.cli][INFO  ]  version_kind                  : stable
[ceph_deploy.cli][INFO  ]  install_common                : False
[ceph_deploy.cli][INFO  ]  overwrite_conf                : False
[ceph_deploy.cli][INFO  ]  quiet                         : False
[ceph_deploy.cli][INFO  ]  dev                           : master
[ceph_deploy.cli][INFO  ]  nogpgcheck                    : False
[ceph_deploy.cli][INFO  ]  local_mirror                  : None
[ceph_deploy.cli][INFO  ]  release                       : None
[ceph_deploy.cli][INFO  ]  install_mon                   : False
[ceph_deploy.cli][INFO  ]  gpg_url                       : None
[ceph_deploy.install][DEBUG ] Installing stable version mimic on cluster ceph hosts ceph-node1 ceph-node2 ceph-node3
[ceph_deploy.install][DEBUG ] Detecting platform for host ceph-node1 ...
[ceph-node1][DEBUG ] connected to host: ceph-node1 
# 。。。。。。。
[ceph-node1][DEBUG ] 正在检查 /var/tmp/yum-root-7DB79r/ceph-release-1-0.el7.noarch.rpm: ceph-release-1-1.el7.noarch
[ceph-node1][DEBUG ] /var/tmp/yum-root-7DB79r/ceph-release-1-0.el7.noarch.rpm 将被安装
[ceph-node1][DEBUG ] 正在解决依赖关系
[ceph-node1][DEBUG ] --> 正在检查事务
[ceph-node1][DEBUG ] ---> 软件包 ceph-release.noarch.0.1-1.el7 将被 安装
[ceph-node1][DEBUG ] --> 解决依赖关系完成
[ceph-node1][DEBUG ] 
[ceph-node1][DEBUG ] 依赖关系解决
[ceph-node1][DEBUG ] 
[ceph-node1][DEBUG ] ================================================================================
[ceph-node1][DEBUG ]  Package          架构       版本        源                                大小
[ceph-node1][DEBUG ] ================================================================================
[ceph-node1][DEBUG ] 正在安装:
[ceph-node1][DEBUG ]  ceph-release     noarch     1-1.el7     /ceph-release-1-0.el7.noarch     535  
[ceph-node1][DEBUG ] 
[ceph-node1][DEBUG ] 事务概要
[ceph-node1][DEBUG ] ================================================================================
[ceph-node1][DEBUG ] 安装  1 软件包
[ceph-node1][DEBUG ] 
[ceph-node1][DEBUG ] 总计:535  
[ceph-node1][DEBUG ] 安装大小:535  
[ceph-node1][DEBUG ] Downloading packages:
[ceph-node1][DEBUG ] Running transaction check
[ceph-node1][DEBUG ] Running transaction test
[ceph-node1][DEBUG ] Transaction test succeeded
[ceph-node1][DEBUG ] Running transaction
[ceph-node1][DEBUG ]   正在安装    : ceph-release-1-1.el7.noarch                                 1/1 
[ceph-node1][DEBUG ] 警告:/etc/yum.repos.d/ceph.repo 已建立为 /etc/yum.repos.d/ceph.repo.rpmnew 
[ceph-node1][DEBUG ]   验证中      : ceph-release-1-1.el7.noarch                                 1/1 
[ceph-node1][DEBUG ] 
[ceph-node1][DEBUG ] 已安装:
[ceph-node1][DEBUG ]   ceph-release.noarch 0:1-1.el7                                                 
[ceph-node1][DEBUG ] 
[ceph-node1][DEBUG ] 完毕!
[ceph-node1][WARNIN] ensuring that /etc/yum.repos.d/ceph.repo contains a high priority
[ceph_deploy][ERROR ] RuntimeError: NoSectionError: No section: 'ceph'

解决办法:在失败的节点上执行yum remove ceph-release
然后在admin节点上继续执行

[root@ceph-admin my-cluster]# ceph-deploy install ceph-node1

使用eph-deploy install ceph-node1安装后,客户端查看,并不是预期的nautilus版本

[root@ceph-node1 ~]# ceph -v
ceph version 13.2.10 (564bdc4ae87418a232fc901524470e1a0f76d641) mimic (stable)

yum安装ceph

先通过yum list ceph查询是否是需要的版本,如果查不到任何内容,先检查ceph源是否存在,再次查询。主要要添加好ceph对应版本的源。

[root@ceph-node1 ~]# yum list ceph
已加载插件:fastestmirror, priorities
Loading mirror speeds from cached hostfile
 * base: mirrors.aliyun.com
 * extras: mirrors.aliyun.com
 * updates: mirrors.aliyun.com
8 packages excluded due to repository priority protections
可安装的软件包
ceph.x86_64                                                           2:14.2.18-0.el7                                                            ceph-x86_64

根据角色,node节点安装ceph软件包:ceph ceph-mon ceph-mgr ceph-radosgw ceph-mds
开始安装

# node1节点需要安装monitor
[root@ceph-node1 ~]# yum install -y ceph ceph-mon
# 。。。
已安装:
  ceph.x86_64 2:14.2.18-0.el7                                                ceph-mon.x86_64 2:14.2.18-0.el7                                               

作为依赖被安装:
  ceph-base.x86_64 2:14.2.18-0.el7                    ceph-common.x86_64 2:14.2.18-0.el7                        ceph-mds.x86_64 2:14.2.18-0.el7             
  ceph-mgr.x86_64 2:14.2.18-0.el7                     ceph-osd.x86_64 2:14.2.18-0.el7                           ceph-selinux.x86_64 2:14.2.18-0.el7         
  libconfig.x86_64 0:1.4.9-5.el7                      librdkafka.x86_64 0:0.11.5-1.el7                          libstoragemgmt.x86_64 0:1.8.1-2.el7_9       
  libstoragemgmt-python.noarch 0:1.8.1-2.el7_9        libstoragemgmt-python-clibs.x86_64 0:1.8.1-2.el7_9        python-enum34.noarch 0:1.0.4-1.el7          
  yajl.x86_64 0:2.0.4-4.el7                          

完毕!

# 检查安装的版本
[root@ceph-node1 ~]# ceph -v
ceph version 14.2.18 (befbc92f3c11eedd8626487211d200c0b44786d9) nautilus (stable)
[root@ceph-node1 ~]# ceph -s
Error initializing cluster client: ObjectNotFound('error calling conf_read_file',)


# node2、node3节点仅按章ceph
[root@ceph-node2 ~]# yum install -y ceph

[root@ceph-node3 ~]# yum install -y ceph

admin节点初始化monitor

ceph-deploy mon create-initial:将部署为在“mon initial members”中定义的监视器,等待它们形成仲裁,然后进将为“mon initial members”中定义的监控器部署,等待它们形成仲裁,然后收集密钥,并在进程中报告监控器状态。如果监视器没有形成仲裁,命令最终将超时。

# admin节点运行,初始化monitor
[root@ceph-admin my-cluster]# ceph-deploy mon create-initial
# 。。。
[ceph_deploy.gatherkeys][INFO  ] Storing ceph.client.admin.keyring
[ceph_deploy.gatherkeys][INFO  ] Storing ceph.bootstrap-mds.keyring
[ceph_deploy.gatherkeys][INFO  ] Storing ceph.bootstrap-mgr.keyring
[ceph_deploy.gatherkeys][INFO  ] keyring 'ceph.mon.keyring' already exists
[ceph_deploy.gatherkeys][INFO  ] Storing ceph.bootstrap-osd.keyring
[ceph_deploy.gatherkeys][INFO  ] Storing ceph.bootstrap-rgw.keyring
[ceph_deploy.gatherkeys][INFO  ] Destroy temp directory /tmp/tmpogYPsd
[root@ceph-admin my-cluster]# ls
ceph.bootstrap-mds.keyring  ceph.bootstrap-osd.keyring  ceph.client.admin.keyring  ceph-deploy-ceph.log
ceph.bootstrap-mgr.keyring  ceph.bootstrap-rgw.keyring  ceph.conf                  ceph.mon.keyring

# 将配置文件以管理员(admin)用推送到各个节点,注:此处的admin是代表管理员角色,与节点无关
[root@ceph-admin my-cluster]# ceph-deploy admin ceph-node1 ceph-node2 ceph-node3

在monitor节点检查是否初始化完成

# node1节点(monitor节点)查看集群是否初始化成功,HEALTH_OK代表集群正常
[root@ceph-node1 ~]# ceph -s
  cluster:
    id:     5f844a3b-1257-4089-bff5-836def275bf0
    health: HEALTH_OK
 
  services:
    mon: 1 daemons, quorum ceph-node1 (age 75s)
    mgr: no daemons active
    osd: 0 osds: 0 up, 0 in
 
  data:
    pools:   0 pools, 0 pgs
    objects: 0 objects, 0 B
    usage:   0 B used, 0 B / 0 B avail
    pgs:  

部署manager demon(mgr)用于监控

# 部署mgr到node1节点
[root@ceph-admin my-cluster]# ceph-deploy mgr create ceph-node1

在node节点上查看集群状态,可以看到mgr已经添加到ceph集群,此处的 HEALTH_WARN 代表还没添加OSD

[root@ceph-node1 ~]# ceph -s
  cluster:
    id:     5f844a3b-1257-4089-bff5-836def275bf0
    health: HEALTH_WARN
            OSD count 0 < osd_pool_default_size 3
 
  services:
    mon: 1 daemons, quorum ceph-node1 (age 62m)
    mgr: ceph-node1(active, since 4m)
    osd: 0 osds: 0 up, 0 in
 
  data:
    pools:   0 pools, 0 pgs
    objects: 0 objects, 0 B
    usage:   0 B used, 0 B / 0 B avail
    pgs:    

添加OSD

每台node节点主机上添加一块硬盘,可以看到名称为/dev/sdb

[root@ceph-node1 ~]# lsblk 
NAME            MAJ:MIN RM  SIZE RO TYPE MOUNTPOINT
sda               8:0    0   10G  0 disk 
├─sda1            8:1    0    1G  0 part /boot
└─sda2            8:2    0    9G  0 part 
  ├─centos-root 253:0    0    8G  0 lvm  /
  └─centos-swap 253:1    0    1G  0 lvm  [SWAP]
sdb               8:16   0   10G  0 disk 
sr0              11:0    1 1024M  0 rom 

将三个节点的/dev/sdb加入OSD

# admin节点操作,生产中可以加上journal进行加速
# 将node节点的/dev/sdb加入OSD
[root@ceph-admin my-cluster]# ceph-deploy osd create --data /dev/sdb ceph-node1
# 。。。
[ceph-node1][INFO  ] checking OSD status...
[ceph-node1][DEBUG ] find the location of an executable
[ceph-node1][INFO  ] Running command: /bin/ceph --cluster=ceph osd stat --format=json
[ceph_deploy.osd][DEBUG ] Host ceph-node1 is now ready for osd use.

# 可以在node节点主机上查看集群状态,表示osd数量为1,但因为小于默认数量3,集群仍为HEALTH_WARN状态
[root@ceph-node1 ~]# ceph -s
  cluster:
    id:     5f844a3b-1257-4089-bff5-836def275bf0
    health: HEALTH_WARN
            OSD count 1 < osd_pool_default_size 3
 
  services:
    mon: 1 daemons, quorum ceph-node1 (age 6m)
    mgr: ceph-node1(active, since 6m)
    osd: 1 osds: 1 up (since 35s), 1 in (since 35s)
 
  data:
    pools:   0 pools, 0 pgs
    objects: 0 objects, 0 B
    usage:   1.0 GiB used, 9.0 GiB / 10 GiB avail
    pgs:  

# 在node节点查看osd
[root@ceph-node1 ~]# ceph osd tree
ID CLASS WEIGHT  TYPE NAME           STATUS REWEIGHT PRI-AFF 
-1       0.00980 root default                                
-3       0.00980     host ceph-node1                         
 0   hdd 0.00980         osd.0           up  1.00000 1.00000 

继续添加剩余的OSD

[root@ceph-admin my-cluster]# ceph-deploy osd create --data /dev/sdb ceph-node2
[root@ceph-admin my-cluster]# ceph-deploy osd create --data /dev/sdb ceph-node3

最后再来查看所有OSD和集群状态

# 在node节点查看OSD
[root@ceph-node1 ~]# ceph osd tree
ID CLASS WEIGHT  TYPE NAME           STATUS REWEIGHT PRI-AFF 
-1       0.02939 root default                                
-3       0.00980     host ceph-node1                         
 0   hdd 0.00980         osd.0           up  1.00000 1.00000 
-5       0.00980     host ceph-node2                         
 1   hdd 0.00980         osd.1           up  1.00000 1.00000 
-7       0.00980     host ceph-node3                         
 2   hdd 0.00980         osd.2           up  1.00000 1.00000 

# 集群状态正常,共3个节点,其中1个monitor,1个mgr,3个osd
[root@ceph-node1 ~]# ceph -s
  cluster:
    id:     5f844a3b-1257-4089-bff5-836def275bf0
    health: HEALTH_OK
 
  services:
    mon: 1 daemons, quorum ceph-node1 (age 10m)
    mgr: ceph-node1(active, since 10m)
    osd: 3 osds: 3 up (since 35s), 3 in (since 35s)
 
  data:
    pools:   0 pools, 0 pgs
    objects: 0 objects, 0 B
    usage:   3.0 GiB used, 27 GiB / 30 GiB avail
    pgs:    

扩容集群

https://docs.ceph.com/en/nautilus/start/quick-ceph-deploy/#expanding-your-cluster



Ceph Metadata Server 元数据服务器添加到node1节点。然后将Ceph Monitor和Ceph Manager添加到node2和node3节点,以提高可靠性和可用性。

添加元数据服务器

要使用Cephfs,您需要至少一个元数据服务器。执行以下内容以创建元数据服务器:

[root@ceph-admin my-cluster]# ceph-deploy mds create ceph-node1

查看状态:mds

# 在node节点上查看mds
[root@ceph-node1 ~]# ceph -s
  cluster:
    id:     5f844a3b-1257-4089-bff5-836def275bf0
    health: HEALTH_OK
 
  services:
    mon: 1 daemons, quorum ceph-node1 (age 48m)
    mgr: ceph-node1(active, since 48m)
    mds:  1 up:standby
    osd: 3 osds: 3 up (since 38m), 3 in (since 38m)
 
  data:
    pools:   0 pools, 0 pgs
    objects: 0 objects, 0 B
    usage:   3.0 GiB used, 27 GiB / 30 GiB avail
    pgs:     

添加监控节点

mon是集群的核心,使用了paxos算法,所以一般会使用奇数个节点。

ceph-deploy mon add:将监控器添加到现有群集:ceph deploy mon add ceph-node1或:ceph deploy mon add --address 192.168.168.201 ceph-node1。

mon添加失败

# 先在node节点上查看mon状态
[root@ceph-node1 ~]# ceph mon stat
e1: 1 mons at {ceph-node1=[v2:192.168.168.201:3300/0,v1:192.168.168.201:6789/0]}, election epoch 5, leader 0 ceph-node1, quorum 0 ceph-node1


# admin节点操作,添加node2和node3为monitor
[root@ceph-admin my-cluster]# ceph-deploy mon add ceph-node2
# 执行报错
[ceph-node2][INFO  ] Running command: ceph --cluster=ceph --admin-daemon /var/run/ceph/ceph-mon.ceph-node2.asok mon_status
[ceph-node2][ERROR ] admin_socket: exception getting command descriptions: [Errno 2] No such file or directory
[ceph-node2][WARNIN] ceph-node2 is not defined in `mon initial members`
[ceph-node2][WARNIN] monitor ceph-node2 does not exist in monmap
[ceph-node2][WARNIN] neither `public_addr` nor `public_network` keys are defined for monitors
[ceph-node2][WARNIN] monitors may not be able to form quorum
[ceph-node2][INFO  ] Running command: ceph --cluster=ceph --admin-daemon /var/run/ceph/ceph-mon.ceph-node2.asok mon_status
[ceph-node2][ERROR ] admin_socket: exception getting command descriptions: [Errno 2] No such file or directory
[ceph-node2][WARNIN] monitor: mon.ceph-node2, might not be running yet

mon问题处理

查阅资料,要在ceph.conf定义一下public_network,一定要添加到[global]段

# 修改配置文件,添加
[root@ceph-admin my-cluster]# vim ceph.conf 
[root@ceph-admin my-cluster]# cat ceph.conf 
[global]
fsid = 5f844a3b-1257-4089-bff5-836def275bf0
mon_initial_members = ceph-node1
mon_host = 192.168.168.201
auth_cluster_required = cephx
auth_service_required = cephx
auth_client_required = cephx

public network = 192.168.168.0/24

# 推送到node节点
[root@ceph-admin my-cluster]# ceph-deploy --overwrite-conf config push ceph-node1 ceph-node2 ceph-node3 

必须使用--overwrite-conf参数,否则会提示RuntimeError: config file /etc/ceph/ceph.conf exists with different content; use --overwrite-conf to overwrite失败。

修改完配置文件,且推送到node后再次执行

# admin节点操作,添加node2为monitor
[root@ceph-admin my-cluster]# ceph-deploy mon add ceph-node2
# admin节点操作,添加node3为monitor
[root@ceph-admin my-cluster]# ceph-deploy mon add ceph-node3

# 其中有个 [ceph-node2][WARNIN] ceph-node2 is not defined in `mon initial members` 的错误警告
# 可以将mon_initial_members = ceph-node1,ceph-node2,ceph-node3 添加到ceph.conf配置中,并推送到node节点
[root@ceph-admin my-cluster]# cat ceph.conf 
[global]
fsid = 5f844a3b-1257-4089-bff5-836def275bf0
mon_initial_members = ceph-node1,ceph-node2,ceph-node3
mon_host = 192.168.168.201
auth_cluster_required = cephx
auth_service_required = cephx
auth_client_required = cephx

public network = 192.168.168.0/24

查看状态:mon

查看monitor状态

# 查看仲裁状态,json格式
[root@ceph-node1 ~]# ceph quorum_status --format json-pretty
{
    "election_epoch": 14,
    "quorum": [
        0,
        1,
        2
    ],
    "quorum_names": [
        "ceph-node1",
        "ceph-node2",
        "ceph-node3"
    ],
    "quorum_leader_name": "ceph-node1",
    "quorum_age": 257,
    "monmap": {
        "epoch": 3,
        "fsid": "5f844a3b-1257-4089-bff5-836def275bf0",
        "modified": "2021-03-21 22:10:22.992987",
        "created": "2021-03-21 18:53:14.599473",
        "min_mon_release": 14,
        "min_mon_release_name": "nautilus",
        "features": {
            "persistent": [
                "kraken",
                "luminous",
                "mimic",
                "osdmap-prune",
                "nautilus"
            ],
            "optional": []
        },
        "mons": [
            {
                "rank": 0,
                "name": "ceph-node1",
                "public_addrs": {
                    "addrvec": [
                        {
                            "type": "v2",
                            "addr": "192.168.168.201:3300",
                            "nonce": 0
                        },
                        {
                            "type": "v1",
                            "addr": "192.168.168.201:6789",
                            "nonce": 0
                        }
                    ]
                },
                "addr": "192.168.168.201:6789/0",
                "public_addr": "192.168.168.201:6789/0"
            },
            {
                "rank": 1,
                "name": "ceph-node2",
                "public_addrs": {
                    "addrvec": [
                        {
                            "type": "v2",
                            "addr": "192.168.168.202:3300",
                            "nonce": 0
                        },
                        {
                            "type": "v1",
                            "addr": "192.168.168.202:6789",
                            "nonce": 0
                        }
                    ]
                },
                "addr": "192.168.168.202:6789/0",
                "public_addr": "192.168.168.202:6789/0"
            },
            {
                "rank": 2,
                "name": "ceph-node3",
                "public_addrs": {
                    "addrvec": [
                        {
                            "type": "v2",
                            "addr": "192.168.168.203:3300",
                            "nonce": 0
                        },
                        {
                            "type": "v1",
                            "addr": "192.168.168.203:6789",
                            "nonce": 0
                        }
                    ]
                },
                "addr": "192.168.168.203:6789/0",
                "public_addr": "192.168.168.203:6789/0"
            }
        ]
    }
}
# 查看mon状态
[root@ceph-node1 ~]# ceph mon stat
e3: 3 mons at {ceph-node1=[v2:192.168.168.201:3300/0,v1:192.168.168.201:6789/0],ceph-node2=[v2:192.168.168.202:3300/0,v1:192.168.168.202:6789/0],ceph-node3=[v2:192.168.168.203:3300/0,v1:192.168.168.203:6789/0]}, election epoch 14, leader 0 ceph-node1, quorum 0,1,2 ceph-node1,ceph-node2,ceph-node3

# 查看mon详细状态
[root@ceph-node1 ~]# ceph mon dump 
dumped monmap epoch 3
epoch 3
fsid 5f844a3b-1257-4089-bff5-836def275bf0
last_changed 2021-03-21 22:10:22.992987
created 2021-03-21 18:53:14.599473
min_mon_release 14 (nautilus)
0: [v2:192.168.168.201:3300/0,v1:192.168.168.201:6789/0] mon.ceph-node1
1: [v2:192.168.168.202:3300/0,v1:192.168.168.202:6789/0] mon.ceph-node2
2: [v2:192.168.168.203:3300/0,v1:192.168.168.203:6789/0] mon.ceph-node3

# 查看集群状态
[root@ceph-node1 ~]# ceph -s
  cluster:
    id:     5f844a3b-1257-4089-bff5-836def275bf0
    health: HEALTH_OK
 
  services:
    mon: 3 daemons, quorum ceph-node1,ceph-node2,ceph-node3 (age 6m)
    mgr: ceph-node1(active, since 48m)
    mds:  1 up:standby
    osd: 3 osds: 3 up (since 53m), 3 in (since 2h)
 
  data:
    pools:   0 pools, 0 pgs
    objects: 0 objects, 0 B
    usage:   3.0 GiB used, 27 GiB / 30 GiB avail
    pgs:  

添加管理器

Ceph Manager守护进程以活动/待机模式运行。部署其他管理器守护程序可确保如果一个守护程序或主机发生故障,则可以在不中断服务的情况下接管另一个。
mgr默认是主备模式,同一时间只有一个是运行的。

# admin节点操作,扩容mgr
[root@ceph-admin my-cluster]# ceph-deploy mgr create ceph-node2 ceph-node3

查看状态:mgr

# 可以看到node1是主,运行状态,node2 node3是备用
[root@ceph-node1 ~]# ceph -s
  cluster:
    id:     5f844a3b-1257-4089-bff5-836def275bf0
    health: HEALTH_OK
 
  services:
    mon: 3 daemons, quorum ceph-node1,ceph-node2,ceph-node3 (age 7m)
    mgr: ceph-node1(active, since 95m), standbys: ceph-node2, ceph-node3
    mds:  1 up:standby
    osd: 3 osds: 3 up (since 12m), 3 in (since 85m)
 
  data:
    pools:   0 pools, 0 pgs
    objects: 0 objects, 0 B
    usage:   3.0 GiB used, 27 GiB / 30 GiB avail
    pgs:  

添加对象网关

要使用Ceph的Ceph对象网关组件,必须部署RGW的实例。执行以下内容以创建RGW的新实例:

# 首先node节点安装软件
root@ceph-node1 ~]# yum list ceph-radosgw
已加载插件:fastestmirror, priorities
Loading mirror speeds from cached hostfile
 * base: mirrors.aliyun.com
 * extras: mirrors.aliyun.com
 * updates: mirrors.aliyun.com
8 packages excluded due to repository priority protections
可安装的软件包
ceph-radosgw.x86_64                                                       2:14.2.18-0.el7                                                        ceph-x86_64
[root@ceph-node1 ~]# yum install -y ceph-radosgw


# admin节点执行
[root@ceph-admin my-cluster]# ceph-deploy rgw create ceph-node1
# 。。。
[ceph_deploy.rgw][INFO  ] The Ceph Object Gateway (RGW) is now running on host ceph-node1 and default port 7480
# 默认情况下,RGW实例将侦听端口7480.可以通过编辑运行RGW的节点上的ceph.conf来更改此操作,如下所示:
# ========ceph.conf
[client]
rgw frontends = civetweb port=80
[client]
rgw frontends = civetweb port=[::]:80
# ========ceph.conf

查看状态:rgw

查看集群状态

[root@ceph-node1 ~]# ceph -s
  cluster:
    id:     5f844a3b-1257-4089-bff5-836def275bf0
    health: HEALTH_OK
 
  services:
    mon: 3 daemons, quorum ceph-node1,ceph-node2,ceph-node3 (age 22m)
    mgr: ceph-node1(active, since 2h), standbys: ceph-node2, ceph-node3
    mds:  1 up:standby
    osd: 3 osds: 3 up (since 68m), 3 in (since 2h)
    rgw: 1 daemon active (ceph-node1)
 
  task status:
 
  data:
    pools:   4 pools, 128 pgs
    objects: 187 objects, 1.2 KiB
    usage:   3.0 GiB used, 27 GiB / 30 GiB avail
    pgs:     128 active+clean

使用集群

客户端安装ceph

[root@centos7 ~]# hostnamectl set-hostname ceph-client
# 修改主机名
[root@ceph-client ~]# 

ceph-client上做以下配置:

  1. 配置epel源和ceph源
  2. 关闭防火墙及selinux
  3. 配置时间同步

以上完成后安装ceph

[root@ceph-client ~]# yum -y install ceph ceph-radosgw -y
[root@ceph-client ~]# ceph --version
ceph version 14.2.18 (befbc92f3c11eedd8626487211d200c0b44786d9) nautilus (stable)

管理节点做免密登录,并推送密钥文件给ceph-client节点

# 配置主机名免密登录
[root@ceph-admin ~]# echo "192.168.168.210    ceph-client" >> /etc/hosts
[root@ceph-admin ~]# ssh-copy-id ceph-client

# 推送管理文件到客户机
[root@ceph-admin ~]# cd my-cluster/
[root@ceph-admin my-cluster]# ceph-deploy admin ceph-client
[ceph_deploy.conf][DEBUG ] found configuration file at: /root/.cephdeploy.conf
[ceph_deploy.cli][INFO  ] Invoked (2.0.1): /usr/bin/ceph-deploy admin ceph-client
[ceph_deploy.cli][INFO  ] ceph-deploy options:
[ceph_deploy.cli][INFO  ]  username                      : None
[ceph_deploy.cli][INFO  ]  verbose                       : False
[ceph_deploy.cli][INFO  ]  overwrite_conf                : False
[ceph_deploy.cli][INFO  ]  quiet                         : False
[ceph_deploy.cli][INFO  ]  cd_conf                       : <ceph_deploy.conf.cephdeploy.Conf instance at 0x7efe4c9ae3f8>
[ceph_deploy.cli][INFO  ]  cluster                       : ceph
[ceph_deploy.cli][INFO  ]  client                        : ['ceph-client']
[ceph_deploy.cli][INFO  ]  func                          : <function admin at 0x7efe4d4cb230>
[ceph_deploy.cli][INFO  ]  ceph_conf                     : None
[ceph_deploy.cli][INFO  ]  default_release               : False
[ceph_deploy.admin][DEBUG ] Pushing admin keys and conf to ceph-client
[ceph-client][DEBUG ] connected to host: ceph-client 
[ceph-client][DEBUG ] detect platform information from remote host
[ceph-client][DEBUG ] detect machine type
[ceph-client][DEBUG ] write cluster configuration to /etc/ceph/{cluster}.conf

此时客户端就已经被生成一些文件如下

[root@ceph-client ~]# ls /etc/ceph/
ceph.client.admin.keyring  ceph.conf  rbdmap  tmp5AzIry

rbd块设备使用

创建存储池pool

# 创建存储池,ceph-demo为pool name,默认三个副本,64 64分别为pg_num和pgp_num
[root@ceph-node1 ~]# ceph osd pool create ceph-demo 64 64
pool 'ceph-demo' created

# 查看所有的pool
[root@ceph-node1 ~]# ceph osd lspools
1 .rgw.root
2 default.rgw.control
3 default.rgw.meta
4 default.rgw.log
5 ceph-demo

# 查看pool详细信息,pg数量,pgp数量,副本数,调度算法等;可以用set设置pool配置
[root@ceph-node1 ~]# ceph osd pool get ceph-demo pg_num
pg_num: 64
[root@ceph-node1 ~]# ceph osd pool get ceph-demo pgp_num
pgp_num: 64
[root@ceph-node1 ~]# ceph osd pool get ceph-demo size
size: 3
[root@ceph-node1 ~]# ceph osd pool get ceph-demo min_size
min_size: 2
[root@ceph-node1 ~]# ceph osd pool get ceph-demo crush_rule
crush_rule: replicated_rule

创建块设备映像:rbd create

  • 命令:rbd create --size {megabytes} {pool-name}/{image-name}

要把块设备加入某节点,必须在集群中创建一个映像。指定存储池必须是已创建好的。

# 在存储池pool为ceph-demo中,创建一个10G的rbd镜像,名为rbd-demo.img,feature为layering
[root@ceph-client ~]# rbd create -p ceph-demo --image rbd-demo.img --size 10G --image-feature layering
# 或者
[root@ceph-client ~]# rbd create ceph-demo/rbd-demo1.img --size 10G


# 初始化rbd,后面指定pool name
[root@ceph-client ~]# rbd pool init ceph-demo

如果创建映像时不指定存储池名称,默认使用rbd存储池,同时rbd存储池要先自行创建好。
rbd create --size 1024 foo表示在默认的rbd存储池中创建一个大小为1GB,名为foo的映像。

罗列块设备映像:rbd ls

  • 命令:rbd ls {poolname}

如果不指定存储池,rbd ls则列出rbd存储池中的块设备。

# 查看指定的pool下的所有rbd
[root@ceph-client ~]# rbd -p ceph-demo ls
rbd-demo.img
rbd-demo1.img
# 或者
[root@ceph-client ~]# rbd ls ceph-demo
[root@ceph-client ~]# rbd list ceph-demo

检索映像信息:rbd info

  • 命令:rbd info {image-name}
  • 命令:rbd info {pool-name}/{image-name}

不指定存储池,rbd info foo则获取rbd存储池中foo映像的信息。

# 查看rbd详细信息
[root@ceph-client ~]# rbd info ceph-demo/rbd-demo.img
rbd image 'rbd-demo.img':
	size 10 GiB in 2560 objects
	order 22 (4 MiB objects)
	snapshot_count: 0
	id: 233b557767d26
	block_name_prefix: rbd_data.233b557767d26
	format: 2
	features: layering
	op_features: 
	flags: 
	create_timestamp: Tue Mar 30 23:02:03 2021
	access_timestamp: Tue Mar 30 23:02:03 2021
	modify_timestamp: Tue Mar 30 23:02:03 2021
[root@ceph-client ~]# rbd info ceph-demo/rbd-demo1.img
rbd image 'rbd-demo1.img':
	size 10 GiB in 2560 objects
	order 22 (4 MiB objects)
	snapshot_count: 0
	id: 2332d4832d85c
	block_name_prefix: rbd_data.2332d4832d85c
	format: 2
	features: layering, exclusive-lock, object-map, fast-diff, deep-flatten
	op_features: 
	flags: 
	create_timestamp: Tue Mar 30 23:03:22 2021
	access_timestamp: Tue Mar 30 23:03:22 2021
	modify_timestamp: Tue Mar 30 23:03:22 2021

删除块设备映像:rbd rm

  • 命令:rbd rm {image-name}
  • 命令:rbd rm {pool-name}/{image-name},从某存储池中删除一个块设备,用要删除的映像名字替换 {image-name} 、用存储池名字替换 {pool-name}
# 删除rbd镜像,删除rbd-demo1.img,或者:rbd rm ceph-demo/rbd-demo1.img
[root@ceph-client ~]# rbd rm -p ceph-demo --image rbd-demo1.img
Removing image: 100% complete...done.

映射块设备:rbd map

用 rbd 把映像名映射为内核模块。必须指定映像名、存储池名、和用户名。若 RBD 内核模块尚未加载, rbd 命令会自动加载。

  • 命令:rbd map {pool-name}/{image-name} --id {user-name}
  • 命令:rbd map {pool-name}/{image-name} --id {user-name} --keyring /path/to/keyring,如果启用了 cephx 认证,还必须提供密钥,可以用密钥环或密钥文件指定密钥。
# 映射,如果执行了多次,可以通过unmap取消映射
[root@ceph-client ~]# rbd -p ceph-demo ls
rbd-demo.img
[root@ceph-client ~]# rbd map ceph-demo/rbd-demo.img
/dev/rbd0

取消块设备映射:rbd unmap

  • 命令:rbd unmap /dev/rbd/{poolname}/{imagename},用 rbd 命令、指定 unmap 选项和设备名。
[root@ceph-client ~]# rbd map ceph-demo/rbd-demo.img
rbd: warning: image already mapped as /dev/rbd0
/dev/rbd1
[root@ceph-client ~]# rbd device list
id pool      namespace image        snap device    
0  ceph-demo           rbd-demo.img -    /dev/rbd0 
1  ceph-demo           rbd-demo.img -    /dev/rbd1 

# 取消块设备映射
[root@ceph-client ~]# rbd unmap /dev/rbd1

查看已映射块设备:rbd showmapped

可以用 rbd 命令的 showmapped 选项查看映射为内核模块的块设备映像。

# 查看rbd信息
[root@ceph-client ~]# rbd showmapped
id pool      namespace image        snap device    
0  ceph-demo           rbd-demo.img -    /dev/rbd0 
# 或者
[root@ceph-client ~]# rbd device list
id pool      namespace image        snap device    
0  ceph-demo           rbd-demo.img -    /dev/rbd0


[root@ceph-client ~]# ls /dev/rbd/
ceph-demo
[root@ceph-client ~]# ls /dev/rbd/ceph-demo/
rbd-demo.img

格式化使用块设备

# 多出了一个裸设备/dev/rdb0
[root@ceph-client ~]# lsblk 
NAME            MAJ:MIN RM  SIZE RO TYPE MOUNTPOINT
sda               8:0    0   10G  0 disk 
├─sda1            8:1    0    1G  0 part /boot
└─sda2            8:2    0    9G  0 part 
  ├─centos-root 253:0    0    8G  0 lvm  /
  └─centos-swap 253:1    0    1G  0 lvm  [SWAP]
sr0              11:0    1 1024M  0 rom  
rbd0            252:0    0   10G  0 disk


# 格式化设备,挂载使用
[root@ceph-client ~]# mkfs.ext4 /dev/rbd0
mke2fs 1.42.9 (28-Dec-2013)
Discarding device blocks: 完成                            
文件系统标签=
OS type: Linux
块大小=4096 (log=2)
分块大小=4096 (log=2)
Stride=1024 blocks, Stripe width=1024 blocks
655360 inodes, 2621440 blocks
131072 blocks (5.00%) reserved for the super user
第一个数据块=0
Maximum filesystem blocks=2151677952
80 block groups
32768 blocks per group, 32768 fragments per group
8192 inodes per group
Superblock backups stored on blocks: 
	32768, 98304, 163840, 229376, 294912, 819200, 884736, 1605632

Allocating group tables: 完成                            
正在写入inode表: 完成                            
Creating journal (32768 blocks): 完成
Writing superblocks and filesystem accounting information: 完成


# 挂载
[root@ceph-client ~]# mkdir /mnt/rbd-demo
[root@ceph-client ~]# mount /dev/rbd0 /mnt/rbd-demo/
[root@ceph-client ~]# df -hT
文件系统                类型      容量  已用  可用 已用% 挂载点
devtmpfs                devtmpfs  561M     0  561M    0% /dev
tmpfs                   tmpfs     573M     0  573M    0% /dev/shm
tmpfs                   tmpfs     573M  8.7M  564M    2% /run
tmpfs                   tmpfs     573M     0  573M    0% /sys/fs/cgroup
/dev/mapper/centos-root xfs       8.0G  1.9G  6.2G   23% /
/dev/sda1               xfs      1014M  163M  852M   16% /boot
tmpfs                   tmpfs     115M     0  115M    0% /run/user/0
/dev/rbd0               ext4      9.8G   37M  9.2G    1% /mnt/rbd-demo

调整块设备映像大小:rbd resize

Ceph 块设备映像是精简配置,只有在你开始写入数据时它们才会占用物理空间。然而,它们都有最大容量,就是你设置的 --size 选项。如果你想增加(或减小) Ceph 块设备映像的最大尺寸,执行下列命令:

  • 命令:rbd resize ceph-demo/rbd-demo.img --size 15G表示扩容;
  • 命令:rbd resize ceph-demo/rbd-demo.img --size 8G --allow-shrink表示缩容。
# 扩容rbd
[root@ceph-client ~]# rbd resize ceph-demo/rbd-demo.img --size 15G
Resizing image: 100% complete...done.

[root@ceph-client ~]# df -h | grep rbd
/dev/rbd0                9.8G   37M  9.2G    1% /mnt/rbd-demo

# 刷新文件系统,才能正常显示扩容后的大小
[root@ceph-client ~]# resize2fs /dev/rbd0
resize2fs 1.42.9 (28-Dec-2013)
Filesystem at /dev/rbd0 is mounted on /mnt/rbd-demo; on-line resizing required
old_desc_blocks = 2, new_desc_blocks = 2
The filesystem on /dev/rbd0 is now 3932160 blocks long.

# 查看扩容是否成功
[root@ceph-client ~]# df -h | grep rbd
/dev/rbd0                 15G   41M   14G    1% /mnt/rbd-demo

客户端重启后自动映射

[root@ceph-client ~]# rbd map ceph-demo/rbd-demo.img
/dev/rbd0
[root@ceph-client ~]# mount /dev/rbd0 /mnt/rbd-demo/
[root@ceph-client ~]# df -h | grep rbd
/dev/rbd0                 15G   41M   14G    1% /mnt/rbd-demo

CephFS文件系统使用

Ceph 文件系统要求 Ceph 存储集群内至少有一个 Ceph 元数据服务器,即mds,前面已安装

[root@ceph-node3 ~]# ceph mds stat
 1 up:standby

创建存储池

删除已创建的存储池

创建存储池,但结果不符合预期测试删除

[root@ceph-node1 ~]# ceph osd pool create cephfs_data 128
Error ERANGE:  pg_num 128 size 3 would mean 960 total pgs, which exceeds max 750 (mon_max_pg_per_osd 250 * num_in_osds 3)
[root@ceph-node1 ~]# ceph osd pool create cephfs_data 64
Error ERANGE:  pg_num 64 size 3 would mean 768 total pgs, which exceeds max 750 (mon_max_pg_per_osd 250 * num_in_osds 3)
[root@ceph-node1 ~]# ceph osd pool create cephfs_data 32
pool 'cephfs_data' created

# 删除该 pool
[root@ceph-node1 ~]# ceph osd pool delete cephfs_data cephfs_data --yes-i-really-really-mean-it
Error EPERM: pool deletion is disabled; you must first set the mon_allow_pool_delete config option to true before you can destroy a pool
# 提示需要在ceph.conf配置文件中加入下一行配置
[root@ceph-node1 ~]# cat /etc/ceph/ceph.conf  | grep delete
mon_allow_pool_delete = true

# 重启ceph-mon服务
[root@ceph-node1 ~]# systemctl restart ceph-mon.target

# 重新删除就成功了
[root@ceph-node1 ~]# ceph osd pool delete cephfs_data cephfs_data --yes-i-really-really-mean-it
pool 'cephfs_data' removed

查看/调整pool的pg/pgp数量

开始重新计算并创建新的存储池,假设规划4个pool,目前有3个osd,那么每个pool中pg数量应改为25,2^4=16 < 25 < 2^5=32,即每个pool中pg的数量应该为32。

谨记在调整pg前,请确保集群状态是健康的。

# 获取自己已创建的
[root@ceph-node1 ~]# ceph osd lspools | grep -v "\."
5 ceph-demo

# 获取现有的PG数和PGP数值,之前创建的是64个
[root@ceph-node1 ~]# ceph osd pool get ceph-demo pg_num
pg_num: 64
[root@ceph-node1 ~]# ceph osd pool get ceph-demo pgp_num
pgp_num: 64

# 检查存储池的副本数
[root@ceph-node1 ~]# ceph osd dump | grep ceph | grep -i size
pool 5 'ceph-demo' replicated size 3 min_size 2 crush_rule 0 object_hash rjenkins pg_num 64 pgp_num 64 autoscale_mode warn last_change 84 flags hashpspool,selfmanaged_snaps stripe_width 0 application rbd

# 重新计算,pg的数量应该为32,重新设置,前提是集群为健康状态
[root@ceph-node1 ~]# ceph health
HEALTH_OK

[root@ceph-node1 ~]# ceph osd pool set ceph-demo pg_num 32
set pool 5 pg_num to 32
# pg调整期间使用ceph -w监测状态,可以看到集群状态的详细信息,可以看到数据的再平衡过程。等状态再次恢复正常后([INF] overall HEALTH_OK),再调整pgp

[root@ceph-node1 ~]# ceph osd pool set ceph-demo pgp_num 32
set pool 5 pgp_num to 32

一个 Ceph 文件系统需要至少两个 RADOS 存储池,一个用于数据、一个用于元数据。配置这些存储池时需考虑:

  • 为元数据存储池设置较高的副本水平,因为此存储池丢失任何数据都会导致整个文件系统失效。
  • 为元数据存储池分配低延时存储器(像 SSD ),因为它会直接影响到客户端的操作延时。

要用默认设置为文件系统创建两个存储池,你可以用下列命令创建新的存储池

[root@ceph-node1 ~]# ceph osd pool create cephfs_data 32
pool 'cephfs_data' created
[root@ceph-node1 ~]# ceph osd pool create cephfs_metadata 32
pool 'cephfs_metadata' created

创建CephFS文件系统:ceph fs new

  • 命令:ceph fs new <fs_name> <metadata> <data>,其中metadata和data是指定pool的名称。
[root@ceph-node1 ~]# ceph fs new cephfs cephfs_metadata cephfs_data
new fs with metadata pool 8 and data pool 7

[root@ceph-node1 ~]# ceph fs ls
name: cephfs, metadata pool: cephfs_metadata, data pools: [cephfs_data ]

文件系统创建完毕后, MDS 服务器就能达到 active 状态了,比如在一个单 MDS 系统中:

[root@ceph-node2 ~]# ceph mds stat
cephfs:1 {0=ceph-node1=up:active}  # 原来的状态值为:    mds:  1 up:standby

挂载CephFS文件系统

# 直接挂在会失败
[root@ceph-client ~]# mount -t ceph 192.168.168.201:6789:/ /mnt/mycephfs/
2021-04-05 13:56:40.777 7f21ba8c3b40 -1 auth: unable to find a keyring on /etc/ceph/ceph.client.guest.keyring,/etc/ceph/ceph.keyring,/etc/ceph/keyring,/etc/ceph/keyring.bin,: (2) No such file or directory
mount error 22 = Invalid argument

# 查看密钥内容
[root@ceph-client ~]# cat /etc/ceph/ceph.client.admin.keyring 
[client.admin]
	key = AQCbJVdg2GmdARAAZITuTKfKWlO4wRLFNkU+rA==
	caps mds = "allow *"
	caps mgr = "allow *"
	caps mon = "allow *"
	caps osd = "allow *"

# 要挂载启用了 cephx 认证的 Ceph 文件系统,你必须指定用户名、密钥。
[root@ceph-client ~]# mount -t ceph 192.168.168.201:6789:/ /mnt/mycephfs/ -o name=admin,secret=AQCbJVdg2GmdARAAZITuTKfKWlO4wRLFNkU+rA==

[root@ceph-client ~]# df -h | grep mnt
192.168.168.201:6789:/    8.3G     0  8.3G    0% /mnt/mycephfs

# 取消挂载
[root@ceph-client ~]# umount /mnt/mycephfs/

# 前述用法会把密码遗留在 Bash 历史里,更安全的方法是从文件读密码。先将密钥写入文件
[root@ceph-client ~]# cat /etc/ceph/ceph.client.admin.keyring | grep key | awk -F " = " '{print $2}' > /etc/ceph/admin.secret

# 再次挂载
[root@ceph-client ~]# mount -t ceph 192.168.168.201:6789:/ /mnt/mycephfs/ -o name=admin,secretfile=/etc/ceph/admin.secret 

[root@ceph-client ~]# df -h | grep mnt
192.168.168.201:6789:/    8.3G     0  8.3G    0% /mnt/mycephfs

常用命令

启动ceph进程

# 启动mon进程
service ceph start mon.ceph-node1 

# 启动msd进程
service ceph start mds.ceph-node1 

# 启动osd进程
service ceph start osd.0

查看集群状态

  • 集群健康状态:ceph health
[root@ceph-node2 ~]# ceph health
HEALTH_OK
  • 集群运行状态:ceph -s、ceph status
[root@ceph-node2 ~]# ceph -s
  cluster:
    id:     5f844a3b-1257-4089-bff5-836def275bf0
    health: HEALTH_OK
 
  services:
    mon: 3 daemons, quorum ceph-node1,ceph-node2,ceph-node3 (age 14m)
    mgr: ceph-node1(active, since 14m), standbys: ceph-node2, ceph-node3
    mds:  1 up:standby
    osd: 3 osds: 3 up (since 14m), 3 in (since 46h)
    rgw: 1 daemon active (ceph-node1)
 
  task status:
 
  data:
    pools:   4 pools, 128 pgs
    objects: 187 objects, 1.2 KiB
    usage:   3.0 GiB used, 27 GiB / 30 GiB avail
    pgs:     128 active+clean

  • 集群运行实时状态:ceph -w、watch ceph -s
# 查看ceph的实时运行状态
[root@ceph-node3 ~]# ceph -w
  • 集群健康状态细节:ceph health detail
[root@ceph-node2 ~]# ceph health detail
HEALTH_OK

空间利用率

  • 存储空间:ceph df
[root@ceph-node2 ~]# ceph df
RAW STORAGE:
    CLASS     SIZE       AVAIL      USED       RAW USED     %RAW USED 
    hdd       30 GiB     27 GiB     28 MiB      3.0 GiB         10.09 
    TOTAL     30 GiB     27 GiB     28 MiB      3.0 GiB         10.09 
 
POOLS:
    POOL                    ID     PGS     STORED      OBJECTS     USED        %USED     MAX AVAIL 
    .rgw.root                1      32     1.2 KiB           4     768 KiB         0       8.5 GiB 
    default.rgw.control      2      32         0 B           8         0 B         0       8.5 GiB 
    default.rgw.meta         3      32         0 B           0         0 B         0       8.5 GiB 
    default.rgw.log          4      32         0 B         175         0 B         0       8.5 GiB 

获取秘钥列表

[root@ceph-node2 ~]# ceph auth list
installed auth entries:

mds.ceph-node1
	key: AQBqQFdgMWhWBBAA3qq0Sc60KRFbY899pSpCxw==
	caps: [mds] allow
	caps: [mon] allow profile mds
	caps: [osd] allow rwx
osd.0
	key: AQDPNldgZ1g5LhAAzIlcxSIRfpXxULQKsfPPWA==
	caps: [mgr] allow profile osd
	caps: [mon] allow profile osd
	caps: [osd] allow *
osd.1
	key: AQCoN1dgSSeKOBAAohpiupICOFJc4hOtFg93Bg==
	caps: [mgr] allow profile osd
	caps: [mon] allow profile osd
	caps: [osd] allow *
osd.2
	key: AQC9N1dgruA9CxAAVxbhFI6QKbrs79h3IINLHw==
	caps: [mgr] allow profile osd
	caps: [mon] allow profile osd
	caps: [osd] allow *
client.admin
	key: AQCbJVdg2GmdARAAZITuTKfKWlO4wRLFNkU+rA==
	caps: [mds] allow *
	caps: [mgr] allow *
	caps: [mon] allow *
	caps: [osd] allow *
client.bootstrap-mds
	key: AQCbJVdgGn6dARAAXkUVgCfy4AK9pYT6OZMjlw==
	caps: [mon] allow profile bootstrap-mds
client.bootstrap-mgr
	key: AQCbJVdg8Y2dARAA/vtYnToOr1dif6jdWIf3aw==
	caps: [mon] allow profile bootstrap-mgr
client.bootstrap-osd
	key: AQCbJVdgA52dARAAkZhIhd8gJ8ymvO3xNKLu0w==
	caps: [mon] allow profile bootstrap-osd
client.bootstrap-rbd
	key: AQCbJVdgf6ydARAAjDIiCKrYomzVghM689m9mw==
	caps: [mon] allow profile bootstrap-rbd
client.bootstrap-rbd-mirror
	key: AQCbJVdgZbydARAA8QvghuiMZ+ZhRCY09HdaCA==
	caps: [mon] allow profile bootstrap-rbd-mirror
client.bootstrap-rgw
	key: AQCbJVdgFsudARAAR6fUVPws29VuOSFzihXc8g==
	caps: [mon] allow profile bootstrap-rgw
client.rgw.ceph-node1
	key: AQCtV1dgV6frIRAA1X3p1VDmeDKOcHcXb0C+2g==
	caps: [mon] allow rw
	caps: [osd] allow rwx
mgr.ceph-node1
	key: AQBhM1dgTML+FBAAe6sZ4WaHkCukQ37r4vP44w==
	caps: [mds] allow *
	caps: [mon] allow profile mgr
	caps: [osd] allow *
mgr.ceph-node2
	key: AQCtS1dgC72IMRAAvN4Sz3e1wx9HSg52aLzMIg==
	caps: [mds] allow *
	caps: [mon] allow profile mgr
	caps: [osd] allow *
mgr.ceph-node3
	key: AQCvS1dgDuo9IxAAB1T1R2naiGYbeddCND3HUQ==
	caps: [mds] allow *
	caps: [mon] allow profile mgr
	caps: [osd] allow *

查看集群配置

  • 集群的详细配置:ceph daemon mon.ceph-node1 config show
[root@ceph-node1 ~]# ceph daemon mon.ceph-node1 config show | more

查看日志目录

  • 查看ceph log日志所在的目录:ceph-conf --name mon.ceph-node1 --show-config-value log_file
[root@ceph-node1 ~]# ceph-conf --name mon.ceph-node1 --show-config-value log_file
/var/log/ceph/ceph-mon.ceph-node1.log

[root@ceph-node1 ~]# ceph-conf --name mon.ceph-node2 --show-config-value log_file
/var/log/ceph/ceph-mon.ceph-node2.log

# 只能到对应机器上才看查询日志

mon

mon状态信息

  • 状态信息:ceph mon stat
[root@ceph-node1 ~]# ceph mon stat
e3: 3 mons at {ceph-node1=[v2:192.168.168.201:3300/0,v1:192.168.168.201:6789/0],ceph-node2=[v2:192.168.168.202:3300/0,v1:192.168.168.202:6789/0],ceph-node3=[v2:192.168.168.203:3300/0,v1:192.168.168.203:6789/0]}, election epoch 62, leader 0 ceph-node1, quorum 0,1,2 ceph-node1,ceph-node2,ceph-node3

[root@ceph-node1 ~]# ceph mon stat --format json-pretty
# 可以用json格式显示
{
    "epoch": 3,
    "min_mon_release_name": "14",
    "num_mons": 3,
    "leader": "ceph-node1",
    "quorum": [
        {
            "rank": 0,
            "name": "ceph-node1"
        },
        {
            "rank": 1,
            "name": "ceph-node2"
        },
        {
            "rank": 2,
            "name": "ceph-node3"
        }
    ]
}
  • 查看状态:ceph mon_status
[root@ceph-node1 ~]# ceph mon_status --format json-pretty
{
    "name": "ceph-node2",
    "rank": 1,
    "state": "peon",
    "election_epoch": 62,
    "quorum": [
        0,
        1,
        2
    ],
    "quorum_age": 2623,
    "features": {
        "required_con": "2449958747315912708",
        "required_mon": [
            "kraken",
            "luminous",
            "mimic",
            "osdmap-prune",
            "nautilus"
        ],
        "quorum_con": "4611087854035861503",
        "quorum_mon": [
            "kraken",
            "luminous",
            "mimic",
            "osdmap-prune",
            "nautilus"
        ]
    },
    "outside_quorum": [],
    "extra_probe_peers": [],
    "sync_provider": [],
    "monmap": {
        "epoch": 3,
        "fsid": "5f844a3b-1257-4089-bff5-836def275bf0",
        "modified": "2021-03-21 22:10:22.992987",
        "created": "2021-03-21 18:53:14.599473",
        "min_mon_release": 14,
        "min_mon_release_name": "nautilus",
        "features": {
            "persistent": [
                "kraken",
                "luminous",
                "mimic",
                "osdmap-prune",
                "nautilus"
            ],
            "optional": []
        },
        "mons": [
            {
                "rank": 0,
                "name": "ceph-node1",
                "public_addrs": {
                    "addrvec": [
                        {
                            "type": "v2",
                            "addr": "192.168.168.201:3300",
                            "nonce": 0
                        },
                        {
                            "type": "v1",
                            "addr": "192.168.168.201:6789",
                            "nonce": 0
                        }
                    ]
                },
                "addr": "192.168.168.201:6789/0",
                "public_addr": "192.168.168.201:6789/0"
            },
            {
                "rank": 1,
                "name": "ceph-node2",
                "public_addrs": {
                    "addrvec": [
                        {
                            "type": "v2",
                            "addr": "192.168.168.202:3300",
                            "nonce": 0
                        },
                        {
                            "type": "v1",
                            "addr": "192.168.168.202:6789",
                            "nonce": 0
                        }
                    ]
                },
                "addr": "192.168.168.202:6789/0",
                "public_addr": "192.168.168.202:6789/0"
            },
            {
                "rank": 2,
                "name": "ceph-node3",
                "public_addrs": {
                    "addrvec": [
                        {
                            "type": "v2",
                            "addr": "192.168.168.203:3300",
                            "nonce": 0
                        },
                        {
                            "type": "v1",
                            "addr": "192.168.168.203:6789",
                            "nonce": 0
                        }
                    ]
                },
                "addr": "192.168.168.203:6789/0",
                "public_addr": "192.168.168.203:6789/0"
            }
        ]
    },
    "feature_map": {
        "mon": [
            {
                "features": "0x3ffddff8ffecffff",
                "release": "luminous",
                "num": 1
            }
        ],
        "osd": [
            {
                "features": "0x3ffddff8ffecffff",
                "release": "luminous",
                "num": 1
            }
        ],
        "client": [
            {
                "features": "0x3ffddff8ffecffff",
                "release": "luminous",
                "num": 2
            }
        ],
        "mgr": [
            {
                "features": "0x3ffddff8ffecffff",
                "release": "luminous",
                "num": 2
            }
        ]
    }
}
  • 选举状态:ceph quorum_status
[root@ceph-node1 ~]# ceph quorum_status 
{"election_epoch":62,"quorum":[0,1,2],"quorum_names":["ceph-node1","ceph-node2","ceph-node3"],"quorum_leader_name":"ceph-node1","quorum_age":2313,"monmap":{"epoch":3,"fsid":"5f844a3b-1257-4089-bff5-836def275bf0","modified":"2021-03-21 22:10:22.992987","created":"2021-03-21 18:53:14.599473","min_mon_release":14,"min_mon_release_name":"nautilus","features":{"persistent":["kraken","luminous","mimic","osdmap-prune","nautilus"],"optional":[]},"mons":[{"rank":0,"name":"ceph-node1","public_addrs":{"addrvec":[{"type":"v2","addr":"192.168.168.201:3300","nonce":0},{"type":"v1","addr":"192.168.168.201:6789","nonce":0}]},"addr":"192.168.168.201:6789/0","public_addr":"192.168.168.201:6789/0"},{"rank":1,"name":"ceph-node2","public_addrs":{"addrvec":[{"type":"v2","addr":"192.168.168.202:3300","nonce":0},{"type":"v1","addr":"192.168.168.202:6789","nonce":0}]},"addr":"192.168.168.202:6789/0","public_addr":"192.168.168.202:6789/0"},{"rank":2,"name":"ceph-node3","public_addrs":{"addrvec":[{"type":"v2","addr":"192.168.168.203:3300","nonce":0},{"type":"v1","addr":"192.168.168.203:6789","nonce":0}]},"addr":"192.168.168.203:6789/0","public_addr":"192.168.168.203:6789/0"}]}}

[root@ceph-node1 ~]# ceph quorum_status --format json-pretty
# json显示
{
    "election_epoch": 62,
    "quorum": [
        0,
        1,
        2
    ],
    "quorum_names": [
        "ceph-node1",
        "ceph-node2",
        "ceph-node3"
    ],
    "quorum_leader_name": "ceph-node1",
    "quorum_age": 2330,
    "monmap": {
        "epoch": 3,
        "fsid": "5f844a3b-1257-4089-bff5-836def275bf0",
        "modified": "2021-03-21 22:10:22.992987",
        "created": "2021-03-21 18:53:14.599473",
        "min_mon_release": 14,
        "min_mon_release_name": "nautilus",
        "features": {
            "persistent": [
                "kraken",
                "luminous",
                "mimic",
                "osdmap-prune",
                "nautilus"
            ],
            "optional": []
        },
        "mons": [
            {
                "rank": 0,
                "name": "ceph-node1",
                "public_addrs": {
                    "addrvec": [
                        {
                            "type": "v2",
                            "addr": "192.168.168.201:3300",
                            "nonce": 0
                        },
                        {
                            "type": "v1",
                            "addr": "192.168.168.201:6789",
                            "nonce": 0
                        }
                    ]
                },
                "addr": "192.168.168.201:6789/0",
                "public_addr": "192.168.168.201:6789/0"
            },
            {
                "rank": 1,
                "name": "ceph-node2",
                "public_addrs": {
                    "addrvec": [
                        {
                            "type": "v2",
                            "addr": "192.168.168.202:3300",
                            "nonce": 0
                        },
                        {
                            "type": "v1",
                            "addr": "192.168.168.202:6789",
                            "nonce": 0
                        }
                    ]
                },
                "addr": "192.168.168.202:6789/0",
                "public_addr": "192.168.168.202:6789/0"
            },
            {
                "rank": 2,
                "name": "ceph-node3",
                "public_addrs": {
                    "addrvec": [
                        {
                            "type": "v2",
                            "addr": "192.168.168.203:3300",
                            "nonce": 0
                        },
                        {
                            "type": "v1",
                            "addr": "192.168.168.203:6789",
                            "nonce": 0
                        }
                    ]
                },
                "addr": "192.168.168.203:6789/0",
                "public_addr": "192.168.168.203:6789/0"
            }
        ]
    }
}

  • 映射信息:ceph mon dump
[root@ceph-node1 ~]# ceph mon dump
dumped monmap epoch 3
epoch 3
fsid 5f844a3b-1257-4089-bff5-836def275bf0
last_changed 2021-03-21 22:10:22.992987
created 2021-03-21 18:53:14.599473
min_mon_release 14 (nautilus)
0: [v2:192.168.168.201:3300/0,v1:192.168.168.201:6789/0] mon.ceph-node1
1: [v2:192.168.168.202:3300/0,v1:192.168.168.202:6789/0] mon.ceph-node2
2: [v2:192.168.168.203:3300/0,v1:192.168.168.203:6789/0] mon.ceph-node3

  • 查看mon的详细状态:ceph daemon mon.ceph-node1 mon_status
[root@ceph-node1 ~]# ceph daemon mon.ceph-node1 mon_status
# 只能在对应的节点上运行
  • 获得一个正在运行的mon map,并保存在mon_map.txt文件中:ceph mon getmap -o mon_map.txt
[root@ceph-node1 ~]# ceph mon getmap -o mon_map.txt
got monmap epoch 3
  • 查看上面获得的map:monmaptool –print mon_map.txt 和ceph mon dump返回值一样
[root@ceph-node1 ~]# monmaptool --print mon_map.txt 
monmaptool: monmap file mon_map.txt
epoch 3
fsid 5f844a3b-1257-4089-bff5-836def275bf0
last_changed 2021-03-21 22:10:22.992987
created 2021-03-21 18:53:14.599473
min_mon_release 14 (nautilus)
0: [v2:192.168.168.201:3300/0,v1:192.168.168.201:6789/0] mon.ceph-node1
1: [v2:192.168.168.202:3300/0,v1:192.168.168.202:6789/0] mon.ceph-node2
2: [v2:192.168.168.203:3300/0,v1:192.168.168.203:6789/0] mon.ceph-node3
  • 查看mon的admin socket
[root@ceph-node1 ~]# ceph-conf --name mon.ceph-node1 --show-config-value admin_socket
/var/run/ceph/ceph-mon.ceph-node1.asok

mon操作

  • 删除一个mon节点:ceph mon remove ceph-node2
[root@ceph-node2 ~]# ceph mon remove ceph-node2
removing mon.ceph-node2 at [v2:192.168.168.202:3300/0,v1:192.168.168.202:6789/0], there will be 2 monitors

# 通过ceph -w实时监控到的状态变化
2021-03-24 22:05:21.524555 mon.ceph-node3 [INF] mon.ceph-node3 calling monitor election
2021-03-24 22:05:23.499174 mon.ceph-node1 [INF] mon.ceph-node1 calling monitor election
2021-03-24 22:05:23.506510 mon.ceph-node1 [INF] mon.ceph-node1 is new leader, mons ceph-node1,ceph-node3 in quorum (ranks 0,1)
  • 将删除的节点添加进来:mon add <name> <IPaddr[:port]> : add new monitor named <name> at <addr>
[root@ceph-node2 ~]# ceph mon add ceph-node2 192.168.168.202
adding mon.ceph-node2 at [v2:192.168.168.202:3300/0,v1:192.168.168.202:6789/0]

# ceph -w 显示的状态变化
2021-03-24 22:07:53.803089 mon.ceph-node1 [WRN] Health check failed: 1/3 mons down, quorum ceph-node1,ceph-node3 (MON_DOWN)
2021-03-24 22:07:53.814976 mon.ceph-node1 [WRN] Health detail: HEALTH_WARN 1/3 mons down, quorum ceph-node1,ceph-node3
2021-03-24 22:07:53.815005 mon.ceph-node1 [WRN] MON_DOWN 1/3 mons down, quorum ceph-node1,ceph-node3
2021-03-24 22:07:53.815015 mon.ceph-node1 [WRN]     mon.ceph-node2 (rank 2) addr [v2:192.168.168.202:3300/0,v1:192.168.168.202:6789/0] is down (out of quorum)
2021-03-24 22:10:00.002661 mon.ceph-node1 [WRN] overall HEALTH_WARN 1/3 mons down, quorum ceph-node1,ceph-node3

# 集群状态
[root@ceph-node1 ~]# ceph health detail
HEALTH_WARN 1/3 mons down, quorum ceph-node1,ceph-node3
MON_DOWN 1/3 mons down, quorum ceph-node1,ceph-node3
    mon.ceph-node2 (rank 2) addr [v2:192.168.168.202:3300/0,v1:192.168.168.202:6789/0] is down (out of quorum)


[root@ceph-node2 ~]# ceph mon dump
dumped monmap epoch 8
epoch 8
fsid 5f844a3b-1257-4089-bff5-836def275bf0
last_changed 2021-03-24 22:25:27.597283
created 2021-03-21 18:53:14.599473
min_mon_release 14 (nautilus)
0: [v2:192.168.168.201:3300/0,v1:192.168.168.201:6789/0] mon.ceph-node1
1: [v2:192.168.168.203:3300/0,v1:192.168.168.203:6789/0] mon.ceph-node3
[root@ceph-node2 ~]# ceph mon add ceph-node2 192.168.168.202:6789
adding mon.ceph-node2 at [v2:192.168.168.202:3300/0,v1:192.168.168.202:6789/0]
[root@ceph-node2 ~]# ceph mon dump
dumped monmap epoch 9
epoch 9
fsid 5f844a3b-1257-4089-bff5-836def275bf0
last_changed 2021-03-24 22:27:21.087636
created 2021-03-21 18:53:14.599473
min_mon_release 14 (nautilus)
0: [v2:192.168.168.201:3300/0,v1:192.168.168.201:6789/0] mon.ceph-node1
1: [v2:192.168.168.203:3300/0,v1:192.168.168.203:6789/0] mon.ceph-node3
2: [v2:192.168.168.202:3300/0,v1:192.168.168.202:6789/0] mon.ceph-node2

mds

mds状态信息

  • 查看msd状态:ceph mds stat
[root@ceph-node1 ~]# ceph mds stat
 1 up:standby
  • 查看msd的映射信息:ceph mds dump
ceph mds dump 

mds操作

  • 删除一个mds节点:ceph mds rm 0 mds.ceph-node1
ceph mds rm 0 mds.ceph-node1

osd

osd状态信息

  • 查看ceph osd运行状态:ceph osd stat
[root@ceph-node1 ~]# ceph osd stat
3 osds: 3 up (since 3h), 3 in (since 13d); epoch: e286
  • 查看osd映射信息:ceph osd dump
[root@ceph-node1 ~]# ceph osd dump 
epoch 286
fsid 5f844a3b-1257-4089-bff5-836def275bf0
created 2021-03-21 18:53:15.026826
modified 2021-04-05 15:58:21.728012
flags sortbitwise,recovery_deletes,purged_snapdirs,pglog_hardlimit
crush_version 7
full_ratio 0.95
backfillfull_ratio 0.9
nearfull_ratio 0.85
require_min_compat_client jewel
min_compat_client jewel
require_osd_release nautilus
pool 1 '.rgw.root' replicated size 3 min_size 2 crush_rule 0 object_hash rjenkins pg_num 32 pgp_num 32 autoscale_mode warn last_change 18 flags hashpspool stripe_width 0 application rgw
pool 2 'default.rgw.control' replicated size 3 min_size 2 crush_rule 0 object_hash rjenkins pg_num 32 pgp_num 32 autoscale_mode warn last_change 20 flags hashpspool stripe_width 0 application rgw
pool 3 'default.rgw.meta' replicated size 3 min_size 2 crush_rule 0 object_hash rjenkins pg_num 32 pgp_num 32 autoscale_mode warn last_change 22 flags hashpspool stripe_width 0 application rgw
pool 4 'default.rgw.log' replicated size 3 min_size 2 crush_rule 0 object_hash rjenkins pg_num 32 pgp_num 32 autoscale_mode warn last_change 24 flags hashpspool stripe_width 0 application rgw
pool 5 'ceph-demo' replicated size 3 min_size 2 crush_rule 0 object_hash rjenkins pg_num 32 pgp_num 32 autoscale_mode warn last_change 270 lfor 0/268/266 flags hashpspool,selfmanaged_snaps stripe_width 0 application rbd
	removed_snaps [1~3]
pool 7 'cephfs_data' replicated size 3 min_size 2 crush_rule 0 object_hash rjenkins pg_num 32 pgp_num 32 autoscale_mode warn last_change 286 flags hashpspool,pool_snaps stripe_width 0 application cephfs
pool 8 'cephfs_metadata' replicated size 3 min_size 2 crush_rule 0 object_hash rjenkins pg_num 32 pgp_num 32 autoscale_mode warn last_change 277 flags hashpspool stripe_width 0 pg_autoscale_bias 4 pg_num_min 16 recovery_priority 5 application cephfs
max_osd 3
osd.0 up   in  weight 1 up_from 121 up_thru 274 down_at 120 last_clean_interval [108,115) [v2:192.168.168.201:6802/1239,v1:192.168.168.201:6803/1239] [v2:192.168.168.201:6804/1239,v1:192.168.168.201:6805/1239] exists,up 29cd06e6-ebe0-4e3e-b37f-d27c2b29892f
osd.1 up   in  weight 1 up_from 120 up_thru 274 down_at 116 last_clean_interval [108,115) [v2:192.168.168.202:6800/1243,v1:192.168.168.202:6801/1243] [v2:192.168.168.202:6802/1243,v1:192.168.168.202:6803/1243] exists,up 4bd897e9-a7c5-4732-85d0-e096fb4e9d09
osd.2 up   in  weight 1 up_from 118 up_thru 274 down_at 117 last_clean_interval [111,115) [v2:192.168.168.203:6800/1182,v1:192.168.168.203:6801/1182] [v2:192.168.168.203:6802/1182,v1:192.168.168.203:6803/1182] exists,up b22b12d0-3751-49bb-ac27-133dd3793886
  • 查看osd的目录树:ceph osd tree
[root@ceph-node1 ~]# ceph osd tree
ID CLASS WEIGHT  TYPE NAME           STATUS REWEIGHT PRI-AFF 
-1       0.02939 root default                                
-3       0.00980     host ceph-node1                         
 0   hdd 0.00980         osd.0           up  1.00000 1.00000 
-5       0.00980     host ceph-node2                         
 1   hdd 0.00980         osd.1           up  1.00000 1.00000 
-7       0.00980     host ceph-node3                         
 2   hdd 0.00980         osd.2           up  1.00000 1.00000 

osd操作

  • down掉一个osd硬盘:ceph osd down 0,down掉osd.0节点
ceph osd down 0
  • 在集群中删除一个osd硬盘:ceph osd rm 0
ceph osd rm 0
  • 在集群中删除一个osd 硬盘 crush map:ceph osd crush rm osd.0
ceph osd crush rm osd.0
  • 在集群中删除一个osd的host节点:ceph osd crush rm ceph-node1
ceph osd crush rm ceph-node1
  • 查看最大osd的个数:ceph osd getmaxosd
ceph osd getmaxosd#默认最大是4个osd节点
  • 置最大的osd的个数(当扩大osd节点的时候必须扩大这个值):ceph osd setmaxosd 10
ceph osd setmaxosd 10
  • 设置osd crush的权重为1.0:ceph osd crush set {id} {weight} [{loc1} [{loc2} …]]
ceph osd crush set 3 3.0 host=ceph-node4 
ceph osd crush reweight osd.3 1.0 
  • 设置osd的权重:ceph osd reweight 3 0.5
ceph osd reweight 3 0.5
  • 把一个osd节点逐出集群:ceph osd out osd.3
ceph osd out osd.3
ceph osd tree# osd.3的reweight变为0了就不再分配数据,但是设备还是存活的
  • 把逐出的osd加入集群:ceph osd in osd.3
ceph osd in osd.3
ceph osd tree
  • 暂停osd (暂停后整个集群不再接收数据):ceph osd pause
ceph osd pause
  • 再次开启osd (开启后再次接收数据):ceph osd unpause
ceph osd unpause
  • 查看一个集群osd.2参数的配置:ceph –admin-daemon /var/run/ceph/ceph-osd.2.asok config show | less
ceph –admin-daemon /var/run/ceph/ceph-osd.2.asok config show | less

pool

查看poll数量

[root@ceph-node1 ~]# ceph osd lspools
1 .rgw.root
2 default.rgw.control
3 default.rgw.meta
4 default.rgw.log
5 ceph-demo
7 cephfs_data
8 cephfs_metadata

创建/删除pool

[root@ceph-node1 ~]# ceph osd pool create cephfs_data 32
pool 'cephfs_data' created

删除一个pool会同时清空pool的所有数据,因此非常危险。(和rm -rf类似)。因此删除pool时ceph要求必须输入两次pool名称,同时加上--yes-i-really-really-mean-it选项。

# 删除该 pool
[root@ceph-node1 ~]# ceph osd pool delete cephfs_data cephfs_data --yes-i-really-really-mean-it
Error EPERM: pool deletion is disabled; you must first set the mon_allow_pool_delete config option to true before you can destroy a pool
# 提示需要在ceph.conf配置文件中加入下一行配置
[root@ceph-node1 ~]# cat /etc/ceph/ceph.conf  | grep delete
mon_allow_pool_delete = true

# 重启ceph-mon服务
[root@ceph-node1 ~]# systemctl restart ceph-mon.target

# 重新删除就成功了
[root@ceph-node1 ~]# ceph osd pool delete cephfs_data cephfs_data --yes-i-really-really-mean-it
pool 'cephfs_data' removed

查看pool详细信息

[root@ceph-node1 ~]# rados df
POOL_NAME              USED OBJECTS CLONES COPIES MISSING_ON_PRIMARY UNFOUND DEGRADED RD_OPS      RD WR_OPS      WR USED COMPR UNDER COMPR 
.rgw.root           768 KiB       4      0     12                  0       0        0     33  33 KiB      4   4 KiB        0 B         0 B 
ceph-demo           652 MiB      78      0    234                  0       0        0   1329 7.5 MiB    258 225 MiB        0 B         0 B 
cephfs_data             0 B       0      0      0                  0       0        0      0     0 B      0     0 B        0 B         0 B 
cephfs_metadata     1.5 MiB      22      0     66                  0       0        0      0     0 B     51  19 KiB        0 B         0 B 
default.rgw.control     0 B       8      0     24                  0       0        0      0     0 B      0     0 B        0 B         0 B 
default.rgw.log         0 B     175      0    525                  0       0        0  38706  38 MiB  25844     0 B        0 B         0 B 
default.rgw.meta        0 B       0      0      0                  0       0        0      0     0 B      0     0 B        0 B         0 B 

total_objects    287
total_used       3.7 GiB
total_avail      26 GiB
total_space      30 GiB

查看pool使用情况

[root@ceph-node1 ~]# ceph df
RAW STORAGE:
    CLASS     SIZE       AVAIL      USED        RAW USED     %RAW USED 
    hdd       30 GiB     26 GiB     763 MiB      3.7 GiB         12.49 
    TOTAL     30 GiB     26 GiB     763 MiB      3.7 GiB         12.49 
 
POOLS:
    POOL                    ID     PGS     STORED      OBJECTS     USED        %USED     MAX AVAIL 
    .rgw.root                1      32     1.2 KiB           4     768 KiB         0       8.2 GiB 
    default.rgw.control      2      32         0 B           8         0 B         0       8.2 GiB 
    default.rgw.meta         3      32         0 B           0         0 B         0       8.2 GiB 
    default.rgw.log          4      32         0 B         175         0 B         0       8.2 GiB 
    ceph-demo                5      32     216 MiB          78     652 MiB      2.51       8.2 GiB 
    cephfs_data              7      32        11 B           1     192 KiB         0       8.2 GiB 
    cephfs_metadata          8      32      81 KiB          23     1.9 MiB         0       8.2 GiB

为pool配置配额

支持object个数配额以及容量大小配额。

# 设置允许最大object数量为10
[root@ceph-node1 ~]# ceph osd pool set-quota cephfs_data max_objects 10
set-quota max_objects = 10 for pool cephfs_data

# 设置允许容量限制为10GB
[root@ceph-node1 ~]# ceph osd pool set-quota cephfs_data max_bytes $((12 * 1024 * 1024 * 1024))
set-quota max_bytes = 12884901888 for pool cephfs_data
# 限制存储数据大小为10M
[root@ceph-node1 ~]# ceph osd pool set-quota cephfs_data max_bytes 100M
set-quota max_bytes = 104857600 for pool cephfs_data

# 查看配额
[root@ceph-node1 ~]# ceph osd pool get-quota cephfs_data
quotas for pool 'cephfs_data':
  max objects: 10 objects
  max bytes  : 12 GiB

取消配额限制只需要把对应值设为0即可。

[root@ceph-node1 ~]# ceph osd pool set-quota cephfs_data max_objects 0
set-quota max_objects = 0 for pool cephfs_data
[root@ceph-node1 ~]# ceph osd pool set-quota cephfs_data max_bytes 0
set-quota max_bytes = 0 for pool cephfs_data
[root@ceph-node1 ~]# ceph osd pool get-quota cephfs_data
quotas for pool 'cephfs_data':
  max objects: N/A
  max bytes  : N/A

创建/删除pool快照

# cephfs_data为pool
[root@ceph-node1 ~]# ceph osd pool mksnap cephfs_data cephfs_data_snap_2021.04.05
created pool cephfs_data snap cephfs_data_snap_2021.04.05

[root@ceph-node1 ~]# ceph osd pool rmsnap cephfs_data cephfs_data_snap_2021.04.05
removed pool cephfs_data snap cephfs_data_snap_2021.04.05

查看pool池pg数量

参考链接:

https://docs.ceph.com/en/nautilus/start/quick-ceph-deploy/#storing-retrieving-object-data

配置文件详解

ceph.conf

  • 该配置文件采用init文件语法,#和;为注释,ceph集群在启动的时候会按照顺序加载所有的conf配置文件。 配置文件分为以下几大块配置。
global:全局配置。
osd:osd专用配置,可以使用osd.N,来表示某一个OSD专用配置,N为osd的编号,如0、1、1等。
mon:mon专用配置,也可以使用mon.A来为某一个monitor节点做专用配置,其中A为该节点的名称,ceph-monitor-2、ceph-monitor-1等。使用命令 ceph mon dump可以获取节点的名称。
client:客户端专用配置。
  • 配置文件可以从多个地方进行顺序加载,如果冲突将使用最新加载的配置,其加载顺序为。
$CEPH_CONF环境变量
-c 指定的位置
/etc/ceph/ceph.conf
~/.ceph/ceph.conf
./ceph.conf
  • 配置文件还可以使用一些元变量应用到配置文件,如。
$cluster:当前集群名。
$type:当前服务类型。
$id:进程的标识符。
$host:守护进程所在的主机名。
$name:值为$type.$id。
  • ceph.conf详细参数
[global]#全局设置
fsid = xxxxxxxxxxxxxxx                           # 集群标识ID 
mon host = 10.0.1.1,10.0.1.2,10.0.1.3            #monitor IP 地址
auth cluster required = cephx                    #集群认证
auth service required = cephx                    #服务认证
auth client required = cephx                     #客户端认证
osd pool default size = 3                        #最小副本数 默认是3
osd pool default min size = 1                    #PG 处于 degraded 状态不影响其 IO 能力,min_size是一个PG能接受IO的最小副本数
public network = 10.0.1.0/24                     #公共网络(monitorIP段) 
cluster network = 10.0.2.0/24                    #集群网络
max open files = 131072                          #默认0#如果设置了该选项,Ceph会设置系统的max open fds
mon initial members = node1, node2, node3        #初始monitor (由创建monitor命令而定)
##############################################################
[mon]
mon data = /var/lib/ceph/mon/ceph-$id
mon clock drift allowed = 1                      #默认值0.05     #monitor间的clock drift
mon osd min down reporters = 13                  #默认值1        #向monitor报告down的最小OSD数
mon osd down out interval = 600                  #默认值300      #标记一个OSD状态为down和out之前ceph等待的秒数
##############################################################
[osd]
osd data = /var/lib/ceph/osd/ceph-$id
osd mkfs type = xfs                                     #格式化系统类型
osd max write size = 512 #默认值90                   #OSD一次可写入的最大值(MB)
osd client message size cap = 2147483648 #默认值100    #客户端允许在内存中的最大数据(bytes)
osd deep scrub stride = 131072 #默认值524288         #在Deep Scrub时候允许读取的字节数(bytes)
osd op threads = 16 #默认值2                         #并发文件系统操作数
osd disk threads = 4 #默认值1                        #OSD密集型操作例如恢复和Scrubbing时的线程
osd map cache size = 1024 #默认值500                 #保留OSD Map的缓存(MB)
osd map cache bl size = 128 #默认值50                #OSD进程在内存中的OSD Map缓存(MB)
osd mount options xfs = "rw,noexec,nodev,noatime,nodiratime,nobarrier" #默认值rw,noatime,inode64  #Ceph OSD xfs Mount选项
osd recovery op priority = 2 #默认值10              #恢复操作优先级,取值1-63,值越高占用资源越高
osd recovery max active = 10 #默认值15              #同一时间内活跃的恢复请求数 
osd max backfills = 4  #默认值10                  #一个OSD允许的最大backfills数
osd min pg log entries = 30000 #默认值3000           #修建PGLog是保留的最大PGLog数
osd max pg log entries = 100000 #默认值10000         #修建PGLog是保留的最大PGLog数
osd mon heartbeat interval = 40 #默认值30            #OSD ping一个monitor的时间间隔(默认30s)
ms dispatch throttle bytes = 1048576000 #默认值 104857600 #等待派遣的最大消息数
objecter inflight ops = 819200 #默认值1024           #客户端流控,允许的最大未发送io请求数,超过阀值会堵塞应用io,为0表示不受限
osd op log threshold = 50 #默认值5                  #一次显示多少操作的log
osd crush chooseleaf type = 0 #默认值为1              #CRUSH规则用到chooseleaf时的bucket的类型
##############################################################
[client]
rbd cache = true #默认值 true      #RBD缓存
rbd cache size = 335544320 #默认值33554432           #RBD缓存大小(bytes)
rbd cache max dirty = 134217728 #默认值25165824      #缓存为write-back时允许的最大dirty字节数(bytes),如果为0,使用write-through
rbd cache max dirty age = 30 #默认值1                #在被刷新到存储盘前dirty数据存在缓存的时间(seconds)
rbd cache writethrough until flush = false #默认值true  #该选项是为了兼容linux-2.6.32之前的virtio驱动,避免因为不发送flush请求,数据不回写
              #设置该参数后,librbd会以writethrough的方式执行io,直到收到第一个flush请求,才切换为writeback方式。
rbd cache max dirty object = 2 #默认值0              #最大的Object对象数,默认为0,表示通过rbd cache size计算得到,librbd默认以4MB为单位对磁盘Image进行逻辑切分
      #每个chunk对象抽象为一个Object;librbd中以Object为单位来管理缓存,增大该值可以提升性能
rbd cache target dirty = 235544320 #默认值16777216    #开始执行回写过程的脏数据大小,不能超过 rbd_cache_max_dirty
最近发表
标签列表