TYPE-2
添加头端复制表项,这种很少出现,一般来说,对端会发送type3类型的路由用于vtep发现
/*
* Install remote VTEP into the kernel if the remote VTEP has asked
* for head-end-replication.
*/
static int zvni_vtep_install(zebra_vni_t *zvni, zebra_vtep_t *zvtep)
{
if (is_vxlan_flooding_head_end() &&
(zvtep->flood_control == VXLAN_FLOOD_HEAD_END_REPL))
//内核添加头端复制表项
return kernel_add_vtep(zvni->vni, zvni->vxlan_if,
&zvtep->vtep_ip);
return 0;
}
添加mac表项(用于同子网转发)
/*
* Install remote MAC into the kernel.
*/
static int zvni_mac_install(zebra_vni_t *zvni, zebra_mac_t *mac)
{
struct zebra_if *zif;
struct zebra_l2info_vxlan *vxl;
bool sticky;
if (!(mac->flags & ZEBRA_MAC_REMOTE))
return 0;
zif = zvni->vxlan_if->info;
if (!zif)
return -1;
vxl = &zif->l2info.vxl;
sticky = !!CHECK_FLAG(mac->flags,
(ZEBRA_MAC_STICKY | ZEBRA_MAC_REMOTE_DEF_GW));
return kernel_add_mac(zvni->vxlan_if, vxl->access_vlan, &mac->macaddr,
mac->fwd_info.r_vtep_ip, sticky);
}
添加邻居表项(跨子网报文转发时,用作内层目的mac)
/*
* Install remote neighbor into the kernel.
*/
static int zvni_neigh_install(zebra_vni_t *zvni, zebra_neigh_t *n)
{
struct zebra_if *zif;
struct zebra_l2info_vxlan *vxl;
struct interface *vlan_if;
#ifdef GNU_LINUX
uint8_t flags;
#endif
int ret = 0;
if (!(n->flags & ZEBRA_NEIGH_REMOTE))
return 0;
zif = zvni->vxlan_if->info;
if (!zif)
return -1;
vxl = &zif->l2info.vxl;
vlan_if = zvni_map_to_svi(vxl->access_vlan, zif->brslave_info.br_if);
if (!vlan_if)
return -1;
#ifdef GNU_LINUX
flags = NTF_EXT_LEARNED;
if (n->flags & ZEBRA_NEIGH_ROUTER_FLAG)
flags |= NTF_ROUTER;
ZEBRA_NEIGH_SET_ACTIVE(n);
ret = kernel_add_neigh(vlan_if, &n->ip, &n->emac, flags);
#endif
return ret;
}
//添加NUD_NOARP邻居
int kernel_add_neigh(struct interface *ifp, struct ipaddr *ip,
struct ethaddr *mac, uint8_t flags)
{
return netlink_neigh_update2(ifp, ip, mac, flags,
NUD_NOARP, RTM_NEWNEIGH);
}
不需要添加路由,路由在创建bdif的时候,该bdif需要作为本l2vni的网关,在上面配置IP后,会生成本网段的网段路由,结合上面的邻居表项即可完成跨子网路由转发。
注: 对于集中式路由网关,设置了default-gw标志的话,发布的本地的mac/ip消息在设置邻居表时标志位NUD_NOARP。如果是携带sticky标志也会是这种类型的邻居,其它的是NTF_EXT_LEARNED表项。
TYPE-3
添加mac值为全零的头端复制fdb表项
/*
* Install remote VTEP into the kernel if the remote VTEP has asked
* for head-end-replication.
*/
static int zvni_vtep_install(zebra_vni_t *zvni, zebra_vtep_t *zvtep)
{
if (is_vxlan_flooding_head_end() &&
(zvtep->flood_control == VXLAN_FLOOD_HEAD_END_REPL))
//内核添加头端复制表项
return kernel_add_vtep(zvni->vni, zvni->vxlan_if,
&zvtep->vtep_ip);
return 0;
}
TYPE-5
FRR-BGP对于网段路由采用的是interface-less模型,如下图所示:
在linux内核中是如下配置:
右边的VTEP的IP为10.200.200.1(underlay-ip),其路由mac为0200.0ade.de01(这个是overlay的mac,通常作为内层报文的mac)。当右边的设备发布一条192.168.1.0/24的网段路由的时候,左边的BGP将会收到如下所示的type-5类型的路由:
可以看到其NLRI中的前缀为192.168.1.0/24,下一跳属性为10.200.200.1(是一个underlay地址)。同时使用扩展路由mac团体携带了overlay网关的mac(0200.0ade.de01),还携带了l3vni。左边的设备收到该地址后会进行处理。
在指定vrf中安装路由
struct nexthop *route_entry_nexthop_ipv4_ifindex_add(struct route_entry *re,
struct in_addr *ipv4,
struct in_addr *src,
ifindex_t ifindex,
vrf_id_t nh_vrf_id)
{
struct nexthop *nexthop;
struct interface *ifp;
nexthop = nexthop_new();
nexthop->vrf_id = nh_vrf_id;
nexthop->type = NEXTHOP_TYPE_IPV4_IFINDEX;
nexthop->gate.ipv4 = *ipv4;
if (src)
nexthop->src.ipv4 = *src;
nexthop->ifindex = ifindex;
ifp = if_lookup_by_index(nexthop->ifindex, nh_vrf_id);
/*Pending: need to think if null ifp here is ok during bootup?
There was a crash because ifp here was coming to be NULL */
if (ifp)
if (connected_is_unnumbered(ifp))//接口必须是没有配置IP的,如果配置了IP会导致路由不能准确下发
SET_FLAG(nexthop->flags, NEXTHOP_FLAG_ONLINK);//设置NEXTHOP_FLAG_ONLINK标志
route_entry_nexthop_add(re, nexthop);
return nexthop;
}
通过上面的函数整理出路由的下一跳后,使用如下函数添加路由:
/*
* Update or delete a prefix from the kernel,
* using info from a dataplane context.
*/
enum zebra_dplane_result kernel_route_update(struct zebra_dplane_ctx *ctx)
{
int cmd, ret;
const struct prefix *p = dplane_ctx_get_dest(ctx);
struct nexthop *nexthop;
if (dplane_ctx_get_op(ctx) == DPLANE_OP_ROUTE_DELETE) {
cmd = RTM_DELROUTE;
} else if (dplane_ctx_get_op(ctx) == DPLANE_OP_ROUTE_INSTALL) {
cmd = RTM_NEWROUTE;
} else if (dplane_ctx_get_op(ctx) == DPLANE_OP_ROUTE_UPDATE) {
if (p->family == AF_INET || v6_rr_semantics) {
/* Single 'replace' operation */
cmd = RTM_NEWROUTE;
} else {
/*
* So v6 route replace semantics are not in
* the kernel at this point as I understand it.
* so let's do a delete then an add.
* In the future once v6 route replace semantics
* are in we can figure out what to do here to
* allow working with old and new kernels.
*
* I'm also intentionally ignoring the failure case
* of the route delete. If that happens yeah we're
* screwed.
*/
if (!RSYSTEM_ROUTE(dplane_ctx_get_old_type(ctx)))
(void)netlink_route_multipath(RTM_DELROUTE,
ctx);
cmd = RTM_NEWROUTE;
}
} else {
return ZEBRA_DPLANE_REQUEST_FAILURE;
}
if (!RSYSTEM_ROUTE(dplane_ctx_get_type(ctx)))
ret = netlink_route_multipath(cmd, ctx);
else
ret = 0;
if ((cmd == RTM_NEWROUTE) && (ret == 0)) {
/* Update installed nexthops to signal which have been
* installed.
*/
for (ALL_NEXTHOPS_PTR(dplane_ctx_get_ng(ctx), nexthop)) {
if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_RECURSIVE))
continue;
if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE)) {
SET_FLAG(nexthop->flags, NEXTHOP_FLAG_FIB);
}
}
}
return (ret == 0 ?
ZEBRA_DPLANE_REQUEST_SUCCESS : ZEBRA_DPLANE_REQUEST_FAILURE);
}
可以使用如下命令达到同样的效果:
sudo ip route add 192.168.1.0/24 via 10.200.200.1 dev br100 proto bgp metric 20 onlink
#注意onlink属性一定要添加,表示直连的邻居,从上面的代码和可以看出
提取路由mac和下一跳ip构建邻居(这个邻居比较特殊,其中mac是overlay的mac,而IP是underlay的IP),在linux内核中添加邻居表项,且设置了noarp属性。
//添加NUD_NOARP邻居
int kernel_add_neigh(struct interface *ifp, struct ipaddr *ip,
struct ethaddr *mac, uint8_t flags)
{
return netlink_neigh_update2(ifp, ip, mac, flags,
NUD_NOARP, RTM_NEWNEIGH);
}
可以使用ip monitor命令监听到这一过程:
10.200.200.1 dev br100 lladdr 02:00:0a:de:de:01 NOARP
可以使用命令sudo ip neigh add 10.200.200.1 dev br100 lladdr 02:00:0a:de:de:01 nud noarp vrf evpn-vrf
达到相同的结果。
同时使用rmac和下一跳IP构建fdb表项:
int kernel_add_mac(struct interface *ifp, vlanid_t vid, struct ethaddr *mac,
struct in_addr vtep_ip, bool sticky)
{
return netlink_macfdb_update(ifp, vid, mac, vtep_ip, RTM_NEWNEIGH,
sticky);
}
可以使用如下命令得到相同的效果:
sudo bridge fdb add 02:00:0a:de:de:01 dev vxlan100 dst 10.200.200.1 self extern_learn
调用栈为:
#0 zebra_vxlan_evpn_vrf_route_add (vrf_id=11, rmac=0x7fff76e7cba0, vtep_ip=0x7fff76e7cacc, host_prefix=0x7fff76e7caf0) at zebra/zebra_vxlan.c:5680
#1 0x0000557f9485a716 in zread_route_add (client=0x557f96929790, hdr=<optimized out>, msg=<optimized out>, zvrf=<optimized out>) at zebra/zapi_msg.c:1488
#2 0x0000557f9485cebb in zserv_handle_commands (client=client@entry=0x557f96929790, msg=msg@entry=0x7ff374001040) at zebra/zapi_msg.c:2532
#3 0x0000557f9485714e in zserv_process_messages (thread=<optimized out>) at zebra/zserv.c:523
#4 0x00007ff37f3ef968 in thread_call (thread=thread@entry=0x7fff76e7e910) at lib/thread.c:1547
#5 0x00007ff37f3cc257 in frr_run (master=0x557f9672baa0) at lib/libfrr.c:1021
#6 0x0000557f9481b1be in main (argc=2, argv=0x7fff76e7ecd8) at zebra/main.c:475
(gdb) s
TYPE4 & TYPE1
TYPE4用于MULTIHOMING,暂时了解不多。
**粗体** _斜体_ [链接](http://example.com) `代码` - 列表 > 引用
。你还可以使用@
来通知其他用户。