Description of problem: The new Manila's CephFS NFS driver is supposed to rely on the ceph nfs orchestrator exposed commands to interact with the cluster and add/remove/update share access in the driver. When an ingress daemon is deployed and used on top of several (3 instances in this case) ceph-nfs backend instances, we experienced a failure in mounting the created export through the frontend VIP. Ceph NFS backends ---- networks: - 172.16.11.0/24 placement: hosts: - oc0-controller-0 - oc0-controller-1 - oc0-controller-2 service_id: nfs service_name: default service_type: nfs Ingress daemon --- placement: hosts: - oc0-controller-0 - oc0-controller-1 - oc0-controller-2 service_id: ingress service_name: ingress.ingress service_type: ingress spec: backend_service: nfs.nfs frontend_port: 20490 monitor_port: 8999 virtual_interface_networks: 172.16.11.0/24 virtual_ip: 172.16.11.159 [ceph: root@oc0-controller-0 /]# ceph nfs cluster ls nfs [ceph: root@oc0-controller-0 /]# ceph nfs cluster info nfs { "nfs": { "virtual_ip": "172.16.11.159", "backend": [ { "hostname": "oc0-controller-0", "ip": "172.16.11.29", "port": 2049 }, { "hostname": "oc0-controller-1", "ip": "172.16.11.35", "port": 2049 }, { "hostname": "oc0-controller-2", "ip": "172.16.11.180", "port": 2049 } ], "port": 20490, "monitor_port": 8999 } } [ceph: root@oc0-controller-0 /]# ceph fs volume ls [ { "name": "cephfs" } ] [ceph: root@oc0-controller-0 /]# ceph fs status cephfs cephfs - 0 clients ====== RANK STATE MDS ACTIVITY DNS INOS DIRS CAPS 0 active mds.oc0-controller-0.jvxwyy Reqs: 0 /s 10 13 12 0 POOL TYPE USED AVAIL manila_metadata metadata 64.0k 189G manila_data data 0 126G STANDBY MDS mds.oc0-controller-2.raphea mds.oc0-controller-1.gyfjey MDS version: ceph version 16.2.7-464-g7bda236f (7bda236ffb358262a6f354a1dde3c16ed7f586b2) pacific (stable) [ceph: root@oc0-controller-0 /]# ceph fs subvolume create cephfs cephfs-subvol [ceph: root@oc0-controller-0 /]# ceph fs subvolume getpath cephfs cephfs-subvol /volumes/_nogroup/cephfs-subvol/c83b1844-0638-48f4-aff2-1c8f762eacaf CREATE AN EXPORT [ceph: root@oc0-controller-0 /]# ceph nfs export create cephfs nfs `ceph fs subvolume getpath cephfs cephfs-subvol` cephfs `ceph fs subvolume getpath cephfs cephfs-subvol` --client_addr 192.168.24.7 { "bind": "/volumes/_nogroup/cephfs-subvol/c83b1844-0638-48f4-aff2-1c8f762eacaf", "fs": "cephfs", "path": "/volumes/_nogroup/cephfs-subvol/c83b1844-0638-48f4-aff2-1c8f762eacaf", "cluster": "nfs", "mode": "none" } GET THE EXPORT [ceph: root@oc0-controller-0 /]# ceph nfs export ls nfs [ "/volumes/_nogroup/cephfs-subvol/c83b1844-0638-48f4-aff2-1c8f762eacaf" ] [ceph: root@oc0-controller-0 /]# ceph nfs export get nfs "/volumes/_nogroup/cephfs-subvol/c83b1844-0638-48f4-aff2-1c8f762eacaf" { "export_id": 1, "path": "/volumes/_nogroup/cephfs-subvol/c83b1844-0638-48f4-aff2-1c8f762eacaf", "cluster_id": "nfs", "pseudo": "/volumes/_nogroup/cephfs-subvol/c83b1844-0638-48f4-aff2-1c8f762eacaf", "access_type": "none", "squash": "none", "security_label": true, "protocols": [ 4 ], "transports": [ "TCP" ], "fsal": { "name": "CEPH", "user_id": "nfs.nfs.1", "fs_name": "cephfs" }, "clients": [ { "addresses": [ "172.16.11.60" ], "access_type": "rw", "squash": "none" } ] } MOUNT THE VOLUME [root@oc0-ceph-0 ~]# mount.nfs4 -o port=12049 172.16.11.29:/volumes/_nogroup/cephfs-subvol/c83b1844-0638-48f4-aff2-1c8f762eacaf /mnt/nfs [root@oc0-ceph-0 ~]# ls /mnt/nfs/ file00 file02 file04 file06 file08 file10 file01 file03 file05 file07 file09 [root@oc0-ceph-0 ~]# umount /mnt/nfs [root@oc0-ceph-0 ~]# mount.nfs4 -o port=20490 172.16.11.159:/volumes/_nogroup/cephfs-subvol/c83b1844-0638-48f4-aff2-1c8f762eacaf /mnt/nfs mount.nfs4: mounting 172.16.11.159:/volumes/_nogroup/cephfs-subvol/c83b1844-0638-48f4-aff2-1c8f762eacaf failed, reason given by server: No such file or directory From the client added to the export, we're able to mount the share bypassing haproxy and going directly to the backend IP:PORT, while it fails using the frontend VIP. Version-Release number of selected component (if applicable): How reproducible: Steps to Reproduce: 1. 2. 3. Actual results: Expected results: Additional info:
Was able to reproduce this issue with # ceph --version ceph version 18.2.1-67.el9cp (e63e407e02b2616a7b4504a4f7c5a76f89aad3ce) reef (stable) # rpm -qa | grep ganesha nfs-ganesha-selinux-5.7-1.el9cp.noarch nfs-ganesha-5.7-1.el9cp.x86_64 nfs-ganesha-rgw-5.7-1.el9cp.x86_64 nfs-ganesha-ceph-5.7-1.el9cp.x86_64 nfs-ganesha-rados-grace-5.7-1.el9cp.x86_64 nfs-ganesha-rados-urls-5.7-1.el9cp.x86_64 Deploy NFS-Ganesha service ========================== [root@argo016 ~]# cat nfs.yaml networks: - 10.8.128.0/21 service_type: nfs service_id: nfsganesha placement: hosts: - argo016 - argo018 - argo019 [root@argo016 ~]# cephadm shell --mount nfs.yaml:/var/lib/ceph/nfs.yaml Inferring fsid 6146f1e0-bfb2-11ee-94e2-ac1f6b0a1844 Inferring config /var/lib/ceph/6146f1e0-bfb2-11ee-94e2-ac1f6b0a1844/mon.argo016/config Using ceph image with id 'c757eefdb83e' and tag 'ceph-7.1-rhel-9-containers-candidate-83807-20240315165811' created on 2024-03-15 17:00:55 +0000 UTC registry-proxy.engineering.redhat.com/rh-osbs/rhceph@sha256:21152e4c8fd1047fc9e63f738cfe629a8c0c529e924872d6b56434b3fd6dbfb2 [ceph: root@argo016 /]# ceph orch apply -i /var/lib/ceph/nfs.yaml Scheduled nfs.nfsganesha update... [ceph: root@argo016 /]# ceph nfs cluster ls [ "nfsganesha" ] [ceph: root@argo016 /]# ceph nfs cluster info nfsganesha { "nfsganesha": { "backend": [ { "hostname": "argo016", "ip": "10.8.128.216", "port": 2049 }, { "hostname": "argo018", "ip": "10.8.128.218", "port": 2049 }, { "hostname": "argo019", "ip": "10.8.128.219", "port": 2049 } ], "virtual_ip": null } } Deploy Ingress ============== [root@argo016 ~]# cat ingress.yaml service_type: ingress service_id: ingress.ingress placement: count: 3 spec: backend_service: nfs.nfsganesha frontend_port: 20490 monitor_port: 8999 virtual_ip: 10.8.128.101/21 virtual_interface_networks: 10.8.128.0/21 placement: hosts: - argo016 - argo018 - argo019 [root@argo016 ~]# cephadm shell --mount ingress.yaml:/var/lib/ceph/ingress.yaml Inferring fsid 6146f1e0-bfb2-11ee-94e2-ac1f6b0a1844 Inferring config /var/lib/ceph/6146f1e0-bfb2-11ee-94e2-ac1f6b0a1844/mon.argo016/config Using ceph image with id 'c757eefdb83e' and tag 'ceph-7.1-rhel-9-containers-candidate-83807-20240315165811' created on 2024-03-15 17:00:55 +0000 UTC registry-proxy.engineering.redhat.com/rh-osbs/rhceph@sha256:21152e4c8fd1047fc9e63f738cfe629a8c0c529e924872d6b56434b3fd6dbfb2 [ceph: root@argo016 /]# [ceph: root@argo016 /]# ceph orch apply -i /var/lib/ceph/ingress.yaml Scheduled ingress.ingress.ingress update... [ceph: root@argo016 /]# ceph orch ps | grep nfs nfs.nfsganesha.0.0.argo016.hmjzui argo016 10.8.128.216:2049 running (6m) 3s ago 6m 74.2M - 5.7 c757eefdb83e d915f8fea7d1 nfs.nfsganesha.1.0.argo018.dojufa argo018 10.8.128.218:2049 running (6m) 4s ago 6m 75.7M - 5.7 c757eefdb83e e52468d76885 nfs.nfsganesha.2.0.argo019.gocaru argo019 10.8.128.219:2049 running (6m) 4s ago 6m 51.2M - 5.7 c757eefdb83e fc499ff75355 [ceph: root@argo016 /]# ceph orch ps | grep ingress haproxy.ingress.ingress.argo016.rjjnls argo016 *:20490,8999 running (18s) 5s ago 18s 22.2M - 2.4.22-f8e3218 5de324a87c1c 92d2dca34046 haproxy.ingress.ingress.argo018.nxppqz argo018 *:20490,8999 running (21s) 5s ago 21s 13.9M - 2.4.22-f8e3218 3e1008b4f42f 6ce8b2ab4432 haproxy.ingress.ingress.argo019.yvgcjt argo019 *:20490,8999 running (20s) 5s ago 20s 15.9M - 2.4.22-f8e3218 3e1008b4f42f c5507719f0cd keepalived.ingress.ingress.argo016.yiwfrd argo016 running (17s) 5s ago 17s 1765k - 2.2.8 2910e3c7c546 f670edd7b0d6 keepalived.ingress.ingress.argo018.ehmmac argo018 running (15s) 5s ago 15s 1765k - 2.2.8 e881fc0c85df bf6c74b2d3ea keepalived.ingress.ingress.argo019.lzwyvl argo019 running (14s) 5s ago 14s 1761k - 2.2.8 e881fc0c85df 7613a4359f4f [ceph: root@argo016 /]# ceph nfs cluster ls [ "nfsganesha" ] [ceph: root@argo016 /]# ceph nfs cluster info nfsganesha { "nfsganesha": { "backend": [ { "hostname": "argo016", "ip": "10.8.128.216", "port": 2049 }, { "hostname": "argo018", "ip": "10.8.128.218", "port": 2049 }, { "hostname": "argo019", "ip": "10.8.128.219", "port": 2049 } ], "monitor_port": 8999, "port": 20490, "virtual_ip": "10.8.128.101" } } [ceph: root@argo016 /]# ceph fs volume ls [ { "name": "cephfs" } ] Create NFS-Ganesha export ========================= [ceph: root@argo016 /]# ceph fs subvolume create cephfs cephfs-subvol [ceph: root@argo016 /]# ceph fs subvolume getpath cephfs cephfs-subvol /volumes/_nogroup/cephfs-subvol/6967443c-13a8-4c54-a8d8-f1be843abdbc [ceph: root@argo016 /]# ceph nfs export create cephfs nfsganesha `ceph fs subvolume getpath cephfs cephfs-subvol` cephfs `ceph fs subvolume getpath cephfs cephfs-subvol` --client_addr 10.8.128.221 { "bind": "/volumes/_nogroup/cephfs-subvol/6967443c-13a8-4c54-a8d8-f1be843abdbc", "fs": "cephfs", "path": "/volumes/_nogroup/cephfs-subvol/6967443c-13a8-4c54-a8d8-f1be843abdbc", "cluster": "nfs", "mode": "none" } [ceph: root@argo016 /]# ceph nfs export ls nfsganesha [ "/volumes/_nogroup/cephfs-subvol/6967443c-13a8-4c54-a8d8-f1be843abdbc" ] [ceph: root@argo016 /]# ceph nfs export info nfsganesha /volumes/_nogroup/cephfs-subvol/6967443c-13a8-4c54-a8d8-f1be843abdbc { "access_type": "none", "clients": [ { "access_type": "rw", "addresses": [ "10.8.128.221" ], "squash": "none" } ], "cluster_id": "nfsganesha", "export_id": 1, "fsal": { "cmount_path": "/", "fs_name": "cephfs", "name": "CEPH", "user_id": "nfs.nfsganesha.cephfs" }, "path": "/volumes/_nogroup/cephfs-subvol/6967443c-13a8-4c54-a8d8-f1be843abdbc", "protocols": [ 3, 4 ], "pseudo": "/volumes/_nogroup/cephfs-subvol/6967443c-13a8-4c54-a8d8-f1be843abdbc", "security_label": true, "squash": "none", "transports": [ "TCP" ] } Mount the export on client ========================== 1. Via - bypassing haproxy and going directly to the backend IP:PORT [root@argo021 mnt]# mount -t nfs -o vers=4.1,port=2049 10.8.128.216:/volumes/_nogroup/cephfs-subvol/6967443c-13a8-4c54-a8d8-f1be843abdbc /mnt/ganesha/ [root@argo021 mnt]# [root@argo021 mnt]# cd /mnt/ganesha/ [root@argo021 ganesha]# ls [root@argo021 ganesha]# touch f1 2. Via - Mount using the frontend VIP [root@argo021 mnt]# mount -t nfs -o vers=4.1,port=20490 10.8.128.101:/volumes/_nogroup/cephfs-subvol/6967443c-13a8-4c54-a8d8-f1be843abdbc /mnt/ganesha/ mount.nfs: mounting 10.8.128.101:/volumes/_nogroup/cephfs-subvol/6967443c-13a8-4c54-a8d8-f1be843abdbc failed, reason given by server: No such file or directory [root@argo021 mnt]#
Verified this BZ with # ceph --version ceph version 18.2.1-89.el9cp (926619fe7135cbd6d305b46782ee7ecc7be199a3) reef (stable) # rpm -qa | grep nfs libnfsidmap-2.5.4-20.el9.x86_64 nfs-utils-2.5.4-20.el9.x86_64 nfs-ganesha-selinux-5.7-2.el9cp.noarch nfs-ganesha-5.7-2.el9cp.x86_64 nfs-ganesha-rgw-5.7-2.el9cp.x86_64 nfs-ganesha-ceph-5.7-2.el9cp.x86_64 nfs-ganesha-rados-grace-5.7-2.el9cp.x86_64 nfs-ganesha-rados-urls-5.7-2.el9cp.x86_64 [root@cali013 ~]# cat nfs.yaml networks: - 10.8.128.0/21 service_type: nfs service_id: nfsganesha placement: hosts: - cali013 - cali015 - cali016 enable_haproxy_protocol: True [root@cali013 ~]# cephadm shell --mount nfs.yaml:/var/lib/ceph/nfs.yaml Inferring fsid 4e687a60-638e-11ee-8772-b49691cee574 Inferring config /var/lib/ceph/4e687a60-638e-11ee-8772-b49691cee574/mon.cali013/config Using ceph image with id '2abcbe3816d6' and tag 'ceph-7.1-rhel-9-containers-candidate-63457-20240326021251' created on 2024-03-26 02:15:29 +0000 UTC registry-proxy.engineering.redhat.com/rh-osbs/rhceph@sha256:358fc7e11068221bbe1a0172e0f056bfd47cf7f1a983bbb8d6d238d3be21f5eb [ceph: root@cali013 /]# ceph orch apply -i /var/lib/ceph/nfs.yaml Scheduled nfs.nfsganesha update... [ceph: root@cali013 /]# [ceph: root@cali013 /]# ceph nfs cluster ls [ "nfsganesha" ] [ceph: root@cali013 /]# ceph nfs cluster info nfsganesha { "nfsganesha": { "backend": [ { "hostname": "cali013", "ip": "10.8.130.13", "port": 2049 }, { "hostname": "cali015", "ip": "10.8.130.15", "port": 2049 }, { "hostname": "cali016", "ip": "10.8.130.16", "port": 2049 } ], "virtual_ip": null } } Deploy Ingress ============== [root@cali013 ~]# cat ingress.yaml service_type: ingress service_id: ingress.ingress placement: count: 3 spec: backend_service: nfs.nfsganesha frontend_port: 20490 monitor_port: 8999 virtual_ip: 10.8.130.236/21 virtual_interface_networks: 10.8.128.0/21 placement: hosts: - cali013 - cali015 - cali016 enable_haproxy_protocol: True [root@cali013 ~]# cephadm shell --mount ingress.yaml:/var/lib/ceph/ingress.yaml Inferring fsid 4e687a60-638e-11ee-8772-b49691cee574 Inferring config /var/lib/ceph/4e687a60-638e-11ee-8772-b49691cee574/mon.cali013/config Using ceph image with id '2abcbe3816d6' and tag 'ceph-7.1-rhel-9-containers-candidate-63457-20240326021251' created on 2024-03-26 02:15:29 +0000 UTC registry-proxy.engineering.redhat.com/rh-osbs/rhceph@sha256:358fc7e11068221bbe1a0172e0f056bfd47cf7f1a983bbb8d6d238d3be21f5eb [ceph: root@cali013 /]# ceph orch apply -i /var/lib/ceph/ingress.yaml Scheduled ingress.ingress.ingress update... [ceph: root@cali013 /]# ceph orch ps | grep nfs nfs.nfsganesha.0.0.cali013.dirtjt cali013 10.8.130.13:2049 running (3m) 4s ago 3m 71.0M - 5.7 2abcbe3816d6 9446260c287a nfs.nfsganesha.1.0.cali015.cqcbwx cali015 10.8.130.15:2049 running (3m) 3m ago 3m 20.5M - 5.7 2abcbe3816d6 4f9a2005047d nfs.nfsganesha.2.0.cali016.jpzjba cali016 10.8.130.16:2049 running (3m) 1s ago 3m 73.1M - 5.7 2abcbe3816d6 feef5000e4d5 [ceph: root@cali013 /]# ceph orch ps | grep ingress haproxy.ingress.ingress.cali013.onqkdd cali013 *:20490,8999 running (26s) 7s ago 26s 37.7M - 2.4.22-f8e3218 46ae9bd034c8 794a6f08bef5 haproxy.ingress.ingress.cali015.wounig cali015 *:20490,8999 running (34s) 1s ago 34s 37.7M - 2.4.17-9f97155 bda92490ac6c e5f18811e7bd haproxy.ingress.ingress.cali016.auvcmk cali016 *:20490,8999 running (33s) 4s ago 33s 39.7M - 2.4.22-f8e3218 5de324a87c1c 4fe8fdec814f keepalived.ingress.ingress.cali013.bnobqk cali013 running (19s) 7s ago 19s 1765k - 2.2.8 f6f3a07d6384 486922ab774f keepalived.ingress.ingress.cali015.pwdkhq cali015 running (18s) 1s ago 18s 1770k - 2.2.4 b79b516c07ed dc4ac47510d2 keepalived.ingress.ingress.cali016.kgbrcd cali016 running (17s) 4s ago 17s 1774k - 2.2.8 2910e3c7c546 5cf01d51dea6 [ceph: root@cali013 /]# ceph nfs cluster ls [ "nfsganesha" ] [ceph: root@cali013 /]# ceph nfs cluster info nfsganesha { "nfsganesha": { "backend": [ { "hostname": "cali013", "ip": "10.8.130.13", "port": 2049 }, { "hostname": "cali015", "ip": "10.8.130.15", "port": 2049 }, { "hostname": "cali016", "ip": "10.8.130.16", "port": 2049 } ], "monitor_port": 8999, "port": 20490, "virtual_ip": "10.8.130.236" } } [ceph: root@cali013 /]# ceph fs volume ls [ { "name": "cephfs" } ] Create NFS-Ganesha export ========================= [ceph: root@cali013 /]# ceph nfs export create cephfs nfsganesha /ganesha1 cephfs --path=/volumes/subgroup0/sub0/536d7252-d1bf-45ba-93d0-f15649a1e002 { "bind": "/ganesha1", "cluster": "nfsganesha", "fs": "cephfs", "mode": "RW", "path": "/volumes/subgroup0/sub0/536d7252-d1bf-45ba-93d0-f15649a1e002" } [ceph: root@cali013 /]# ceph nfs export info nfsganesha /ganesha1 { "access_type": "RW", "clients": [], "cluster_id": "nfsganesha", "export_id": 1, "fsal": { "cmount_path": "/", "fs_name": "cephfs", "name": "CEPH", "user_id": "nfs.nfsganesha.cephfs" }, "path": "/volumes/subgroup0/sub0/536d7252-d1bf-45ba-93d0-f15649a1e002", "protocols": [ 3, 4 ], "pseudo": "/ganesha1", "security_label": true, "squash": "none", "transports": [ "TCP" ] } Mount the export on client ========================== 1. Via - bypassing haproxy and going directly to the backend IP:PORT [root@ceph-msaini-faptco-node7 mnt]# mount -t nfs -o vers=4.1,port=2049 10.8.130.13:/ganesha1 /mnt/ganesha/ [root@ceph-msaini-faptco-node7 mnt]# cd /mnt/ganesha/ [root@ceph-msaini-faptco-node7 ganesha]# ls dir1 dir2 file_ops pynfs tmp tree [root@ceph-msaini-faptco-node7 ganesha]# touch f1 [root@ceph-msaini-faptco-node7 ganesha]# cd .. [root@ceph-msaini-faptco-node7 mnt]# umount /mnt/ganesha/ 2. Via - Mount using the frontend VIP [root@ceph-msaini-faptco-node7 mnt]# mount -t nfs -o vers=4.1,port=20490 10.8.130.236:/ganesha1 /mnt/ganesha/ [root@ceph-msaini-faptco-node7 mnt]# cd /mnt/ganesha/ [root@ceph-msaini-faptco-node7 ganesha]# ls dir1 dir2 f1 file_ops pynfs tmp tree [root@ceph-msaini-faptco-node7 ganesha]#
Since the problem described in this bug report should be resolved in a recent advisory, it has been closed with a resolution of ERRATA. For information on the advisory (Critical: Red Hat Ceph Storage 7.1 security, enhancements, and bug fix update), and where to find the updated files, follow the link below. If the solution does not work for you, open a new bug report. https://access.redhat.com/errata/RHSA-2024:3925