一. 简介

记录一下如何通过ip命令创建vxlan虚接口的。

分析在创建接口,和up接口期间,vxlan模块执行的初始化动作。

二. ip命令

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
zrf@debian:~$ ip link add type vxlan help
Usage: ... vxlan id VNI
[ { group | remote } IP_ADDRESS ]
[ local ADDR ]
[ ttl TTL ]
[ tos TOS ]
[ df DF ]
[ flowlabel LABEL ]
[ dev PHYS_DEV ]
[ dstport PORT ]
[ srcport MIN MAX ]
[ reserved_bits VALUE ]
[ [no]learning ]
[ [no]proxy ]
[ [no]rsc ]
[ [no]l2miss ]
[ [no]l3miss ]
[ ageing SECONDS ]
[ maxaddress NUMBER ]
[ [no]udpcsum ]
[ [no]udp6zerocsumtx ]
[ [no]udp6zerocsumrx ]
[ [no]remcsumtx ] [ [no]remcsumrx ]
[ [no]localbypass ]
[ [no]external ] [ gbp ] [ gpe ]
[ [no]vnifilter ]

Where: VNI := 0-16777215
ADDR := { IP_ADDRESS | any }
TOS := { NUMBER | inherit }
TTL := { 1..255 | auto | inherit }
DF := { unset | set | inherit }
LABEL := 0-1048575

通常情况下,会使用以下命令创建一个vxlan虚接口:

1
2
ip link add vx100 type vxlan vni 100 remote 192.168.206.1 dstport 4789
ip link set dev vx100 up

三. 创建虚接口

3.1 模块初始化

vxlan模块初始化时,会通过rtnl_link_register注册vxlan类型的rt netlink函数列表vxlan_link_ops

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
static struct rtnl_link_ops vxlan_link_ops __read_mostly = {
.kind = "vxlan",
.maxtype = IFLA_VXLAN_MAX,
.policy = vxlan_policy,
.priv_size = sizeof(struct vxlan_dev),
.setup = vxlan_setup,
.validate = vxlan_validate,
.newlink = vxlan_newlink,
.changelink = vxlan_changelink,
.dellink = vxlan_dellink,
.get_size = vxlan_get_size,
.fill_info = vxlan_fill_info,
.get_link_net = vxlan_get_link_net,
};

static int __init vxlan_init_module(void)
{
rc = rtnl_link_register(&vxlan_link_ops);
}

3.2 创建net device实例

使用ip link add vx100 type vxlan命令行时,会由rtnl模块rtnetlink_rcv_msg接收来自ip进程的配置:

从下面的代码可以分析得到,创建了vxlan后vxlan_fdb_update(vxlan, all_zeros_mac, &dst->remote_ip)会在接口转发表中添加一个全0的mac表项,可以通过bridge fdb show dev vx100查询

1
2
3
4
zrf@debian:~$ sudo bridge fdb show dev vx100
7e:4d:ac:46:22:fb vlan 1 master br1 permanent
7e:4d:ac:46:22:fb master br1 permanent
00:00:00:00:00:00 dst 192.168.206.250 self permanent
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack) {
__rtnl_newlink() {
rtnl_newlink_create() {
dev = rtnl_create_link(tgt_net, ifname, name_assign_type, ops, tb, extack) {
dev = alloc_netdev_mqs(ops->priv_size, ifname, ops->setup) {
dev = kvzalloc(struct_size(dev, priv, sizeof_priv));
/* vxlan_setup() */
ops->setup(dev) {
struct vxlan_dev *vxlan = netdev_priv(dev);
INIT_HLIST_HEAD(&vxlan->fdb_list);
}
}
}
/* vxlan_newlink() */
ops->newlink(dev, &params, extack) {
__vxlan_dev_create(link_net, dev, &conf, extack) {
vxlan_dev_configure(net, dev, conf, extack) {
vxlan_config_apply() {
vxlan_ether_setup(dev) {
/* 重要 */
dev->netdev_ops = &vxlan_netdev_ether_ops;
}
memcpy(&dst->remote_ip, &conf->remote_ip, sizeof(conf->remote_ip));
memcpy(&vxlan->cfg, conf, sizeof(*conf));
}
}
register_netdevice(dev) {
/* vxlan_init() */
dev->netdev_ops->ndo_init(dev) {
rhashtable_init(&vxlan->fdb_hash_tbl, &vxlan_fdb_rht_params);
}
list_netdevice(dev) {
list_add_tail_rcu(&dev->dev_list, &net->dev_base_head);
}
}
if (!vxlan_addr_any(&dst->remote_ip)) {
vxlan_fdb_update(vxlan, all_zeros_mac, &dst->remote_ip) {
vxlan_fdb_update_create(vxlan, mac, ip) {
vxlan_fdb_create(vxlan, mac, ip) {
f = vxlan_fdb_alloc(vxlan, mac, state, src_vni, ndm_flags);
vxlan_fdb_append(f, ip, port, vni, ifindex, &rd);
rhashtable_lookup_insert_fast(&vxlan->fdb_hash_tbl, &f->rhnode);
}
}
}
}
}
}
}
}
}

3.3 up接口

执行ip link set dev vx100 up命令行时,会由rtnl模块rtnetlink_rcv_msg接收来自ip进程的配置:

从下面的代码可以分析得到vxlan_socket_create(vxlan->net, ipv6, vxlan->cfg.dst_port),up了vxlan接口后,会启动一个UDP服务,通过vxlan_rcv接收来自对端的vxlan数据包,监听dstport服务,可以通过netstat -anpu查看

1
2
zrf@debian:~$ sudo netstat -anpu | grep 4789
udp 0 0 0.0.0.0:4789 0.0.0.0:*
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
do_setlink(skb, dev, tgt_net, ifm, extack, tb, 0) {
netif_change_flags(dev, rtnl_dev_combine_flags(dev, ifm), extack) {
__dev_change_flags(dev, flags, extack) {
__dev_open(dev, extack) {
/* vxlan_open() */
ops->ndo_open(dev) {
vxlan_sock_add(vxlan) {
__vxlan_sock_add(vxlan, false) {
vxlan_socket_create(vxlan->net, ipv6,
vxlan->cfg.dst_port, vxlan->cfg.flags,
l3mdev_index) {
struct vxlan_sock *vs;
vs = kzalloc(sizeof(*vs), GFP_KERNEL);
vs->sock = vxlan_create_sock(net, ipv6, port, flags, ifindex) {
udp_sock_create(net, &udp_conf, &sock);
}
/* 重要 */
tunnel_cfg.encap_rcv = vxlan_rcv;
setup_udp_tunnel_sock(net, sock, &tunnel_cfg);
}
}
}
vxlan_multicast_join(vxlan);
}
}
}
}
}
return err;
}