Hello,
this whole shebang has already taken me nearly a week to try to figure out, but I'm still stuck.
Background:
I want to spin up a 5 VM Docker Swarm cluster. They're meant to be using keepaliveD for HA/Failover using a simple SDN. That seems to be working OK.
Now I "only" need peristent storage. Originally the plan was to create a 6th VM with a extra large disk and cockpit to manage NFS shares. As I don't want to keep another VM up-to-date the new plan was to create a CephFS and mount it into the VMs.
Our network on the pve-nodes looks like this:

The public Ceph-network runs on bond900. According to the documentation our, now former, colleague wrote the network uses a VLAN tag (2160).
So I created a Linux VLAN (bond900.2160) and then a bridge (vmbr900).
Here the configuration according to the /etc/network/interfaces file of one of our Proxmox nodes:
	
	
		
			
	
	
	
		
		
	
Here the sdn file:
	
	
	
		
And, lastly, the /etc/ceph/ceph.config fle:
	
	
		
			
	
	
	
		
		
	
Now, I did, what I found in other Threads. I copied the keyfile onto the docker node and tried to mount the cephFS with
I get the error:
Ping to the public cluster address also doesn't work ("From 10.16.10.101 [...] Destination Host Unreachable")
Strangely enough, I can ping the address 10.16.10.201 when I use specifically the device of the SDN-network:
Currently I'm out of ideas. Does someone here have an idea?
				
			this whole shebang has already taken me nearly a week to try to figure out, but I'm still stuck.
Background:
I want to spin up a 5 VM Docker Swarm cluster. They're meant to be using keepaliveD for HA/Failover using a simple SDN. That seems to be working OK.
Now I "only" need peristent storage. Originally the plan was to create a 6th VM with a extra large disk and cockpit to manage NFS shares. As I don't want to keep another VM up-to-date the new plan was to create a CephFS and mount it into the VMs.
Our network on the pve-nodes looks like this:

The public Ceph-network runs on bond900. According to the documentation our, now former, colleague wrote the network uses a VLAN tag (2160).
So I created a Linux VLAN (bond900.2160) and then a bridge (vmbr900).
Here the configuration according to the /etc/network/interfaces file of one of our Proxmox nodes:
		Code:
	
	auto lo
iface lo inet loopback
iface eno3 inet manual
auto enp134s0f0
iface enp134s0f0 inet manual
#bond900
auto enp134s0f1
iface enp134s0f1 inet manual
#bond901
auto enp175s0f0
iface enp175s0f0 inet manual
#bond900
auto enp175s0f1
iface enp175s0f1 inet manual
#bond901
auto enp216s0f0
iface enp216s0f0 inet manual
#bond0
auto enp216s0f1
iface enp216s0f1 inet static
    address 10.16.1.201/24
#Corosync Ring1
iface eno1 inet manual
iface eno2 inet manual
auto enp24s0f0
iface enp24s0f0 inet manual
#bond0
auto enp24s0f1
iface enp24s0f1 inet static
    address 10.16.0.201/24
#Corosync Ring0
iface eno4 inet manual
iface eno5 inet manual
iface eno6 inet manual
auto bond0
iface bond0 inet manual
    bond-slaves enp216s0f0 enp24s0f0
    bond-miimon 100
    bond-mode 802.3ad
    bond-xmit-hash-policy layer2
#  Net
auto bond900
iface bond900 inet static
    address 10.16.10.201/24
    bond-slaves enp134s0f0 enp175s0f0
    bond-miimon 100
    bond-mode 802.3ad
    bond-xmit-hash-policy layer3+4
#CEPH public
auto bond901
iface bond901 inet static
    address 10.16.11.201/24
    bond-slaves enp134s0f1 enp175s0f1
    bond-miimon 100
    bond-mode 802.3ad
    bond-xmit-hash-policy layer3+4
#CEPH private
auto bond0.15
iface bond0.15 inet manual
#  Gast
auto bond0.22
iface bond0.22 inet manual
#  VOIP FN
auto bond0.921
iface bond0.921 inet manual
#  DMZ
auto bond900.2160
iface bond900.2160 inet manual
auto vmbr0
iface vmbr0 inet static
    address 172.16.15.201/16
    gateway 172.16.0.1
    bridge-ports bond0
    bridge-stp off
    bridge-fd 0
#  Intern
auto vmbr1
iface vmbr1 inet manual
    bridge-ports bond0.15
    bridge-stp off
    bridge-fd 0
#  Gast
auto vmbr2
iface vmbr2 inet manual
    bridge-ports bond0.22
    bridge-stp off
    bridge-fd 0
#  VOIP FN
auto vmbr3
iface vmbr3 inet manual
    bridge-ports bond0.921
    bridge-stp off
    bridge-fd 0
#  DMZ
auto vmbr900
iface vmbr900 inet manual
    bridge-ports bond900.2160
    bridge-stp off
    bridge-fd 0
#CEPH Public
source /etc/network/interfaces.d/*
	Here the sdn file:
		Code:
	
	#version:12
auto keepnet
iface keepnet
    address 192.168.0.1/29
    bridge_ports none
    bridge_stp off
    bridge_fd 0
    alias DockerKeepaliveD
    ip-forward on
	And, lastly, the /etc/ceph/ceph.config fle:
		Code:
	
	[global]
     auth_client_required = cephx
     auth_cluster_required = cephx
     auth_service_required = cephx
     cluster_network = 10.16.11.201/24
     fsid = aea7d06a-18ce-4e6c-9381-ad31953d6717
     mon_allow_pool_delete = true
     mon_host = 10.16.10.201 10.16.10.202 10.16.10.203
     ms_bind_ipv4 = true
     ms_bind_ipv6 = false
     osd_pool_default_min_size = 2
     osd_pool_default_size = 3
     public_network = 10.16.10.201/24
[client]
     keyring = /etc/pve/priv/$cluster.$name.keyring
[mds]
     keyring = /var/lib/ceph/mds/ceph-$id/keyring
[mds.pvefn01-dockerCephFS]
     host = pvefn01
     mds_standby_for_name = pve
[mds.pvefn02-dockerCephFS]
     host = pvefn02
     mds_standby_for_name = pve
[mds.pvefn03-dockerCephFS]
     host = pvefn03
     mds_standby_for_name = pve
[mon.pvefn01]
     public_addr = 10.16.10.201
[mon.pvefn02]
     public_addr = 10.16.10.202
[mon.pvefn03]
     public_addr = 10.16.10.203
	Now, I did, what I found in other Threads. I copied the keyfile onto the docker node and tried to mount the cephFS with
mount.ceph admin@<storage-id>.cephfs=/ /mnt/dockerFS -o 'secretfile=/etc/ceph/admin.keyring,mon_addr=10.16.10.201:6789/10.16.10.202:6789/10.16.10.203:6789'I get the error:
mount error: no mds (Metadata Server) is up. The cluster might be laggy, or you may not be authorizedPing to the public cluster address also doesn't work ("From 10.16.10.101 [...] Destination Host Unreachable")
Strangely enough, I can ping the address 10.16.10.201 when I use specifically the device of the SDN-network:
~$ ping 10.16.10.201 -I ens19PING 10.16.10.201 (10.16.10.201) from 192.168.0.2 ens19: 56(84) bytes of data.64 bytes from 10.16.10.201: icmp_seq=10 ttl=64 time=0.264 ms64 bytes from 10.16.10.201: icmp_seq=11 ttl=64 time=0.137 ms64 bytes from 10.16.10.201: icmp_seq=12 ttl=64 time=0.123 ms64 bytes from 10.16.10.201: icmp_seq=13 ttl=64 time=0.202 ms^C--- 10.16.10.201 ping statistics ---13 packets transmitted, 4 received, 69.2308% packet loss, time 12281msrtt min/avg/max/mdev = 0.123/0.181/0.264/0.056 msCurrently I'm out of ideas. Does someone here have an idea?
	
	
