TYPE-2

添加头端复制表项,这种很少出现,一般来说,对端会发送type3类型的路由用于vtep发现

/*
 * Install remote VTEP into the kernel if the remote VTEP has asked
 * for head-end-replication.
 */
static int zvni_vtep_install(zebra_vni_t *zvni, zebra_vtep_t *zvtep)
{
    if (is_vxlan_flooding_head_end() &&
            (zvtep->flood_control == VXLAN_FLOOD_HEAD_END_REPL))
        //内核添加头端复制表项
        return kernel_add_vtep(zvni->vni, zvni->vxlan_if,
                &zvtep->vtep_ip);
    return 0;
}

添加mac表项(用于同子网转发)

/*
 * Install remote MAC into the kernel.
 */
static int zvni_mac_install(zebra_vni_t *zvni, zebra_mac_t *mac)
{
    struct zebra_if *zif;
    struct zebra_l2info_vxlan *vxl;
    bool sticky;

    if (!(mac->flags & ZEBRA_MAC_REMOTE))
        return 0;

    zif = zvni->vxlan_if->info;
    if (!zif)
        return -1;
    vxl = &zif->l2info.vxl;

    sticky = !!CHECK_FLAG(mac->flags,
             (ZEBRA_MAC_STICKY | ZEBRA_MAC_REMOTE_DEF_GW));

    return kernel_add_mac(zvni->vxlan_if, vxl->access_vlan, &mac->macaddr,
                  mac->fwd_info.r_vtep_ip, sticky);
}

添加邻居表项(跨子网报文转发时,用作内层目的mac)

/*
 * Install remote neighbor into the kernel.
 */
static int zvni_neigh_install(zebra_vni_t *zvni, zebra_neigh_t *n)
{
    struct zebra_if *zif;
    struct zebra_l2info_vxlan *vxl;
    struct interface *vlan_if;
#ifdef GNU_LINUX
    uint8_t flags;
#endif
    int ret = 0;

    if (!(n->flags & ZEBRA_NEIGH_REMOTE))
        return 0;

    zif = zvni->vxlan_if->info;
    if (!zif)
        return -1;
    vxl = &zif->l2info.vxl;

    vlan_if = zvni_map_to_svi(vxl->access_vlan, zif->brslave_info.br_if);
    if (!vlan_if)
        return -1;
#ifdef GNU_LINUX
    flags = NTF_EXT_LEARNED;
    if (n->flags & ZEBRA_NEIGH_ROUTER_FLAG)
        flags |= NTF_ROUTER;
    ZEBRA_NEIGH_SET_ACTIVE(n);
    ret = kernel_add_neigh(vlan_if, &n->ip, &n->emac, flags);
#endif
    return ret;
}
//添加NUD_NOARP邻居
int kernel_add_neigh(struct interface *ifp, struct ipaddr *ip,
             struct ethaddr *mac, uint8_t flags)
{
    return netlink_neigh_update2(ifp, ip, mac, flags,
                     NUD_NOARP, RTM_NEWNEIGH);
}

不需要添加路由,路由在创建bdif的时候,该bdif需要作为本l2vni的网关,在上面配置IP后,会生成本网段的网段路由,结合上面的邻居表项即可完成跨子网路由转发。

注: 对于集中式路由网关,设置了default-gw标志的话,发布的本地的mac/ip消息在设置邻居表时标志位NUD_NOARP。如果是携带sticky标志也会是这种类型的邻居,其它的是NTF_EXT_LEARNED表项。

TYPE-3

添加mac值为全零的头端复制fdb表项

/*
 * Install remote VTEP into the kernel if the remote VTEP has asked
 * for head-end-replication.
 */
static int zvni_vtep_install(zebra_vni_t *zvni, zebra_vtep_t *zvtep)
{
    if (is_vxlan_flooding_head_end() &&
            (zvtep->flood_control == VXLAN_FLOOD_HEAD_END_REPL))
        //内核添加头端复制表项
        return kernel_add_vtep(zvni->vni, zvni->vxlan_if,
                &zvtep->vtep_ip);
    return 0;
}

TYPE-5

FRR-BGP对于网段路由采用的是interface-less模型,如下图所示:
image.png
在linux内核中是如下配置:

右边的VTEP的IP为10.200.200.1(underlay-ip),其路由mac为0200.0ade.de01(这个是overlay的mac,通常作为内层报文的mac)。当右边的设备发布一条192.168.1.0/24的网段路由的时候,左边的BGP将会收到如下所示的type-5类型的路由:
image.png
可以看到其NLRI中的前缀为192.168.1.0/24,下一跳属性为10.200.200.1(是一个underlay地址)。同时使用扩展路由mac团体携带了overlay网关的mac(0200.0ade.de01),还携带了l3vni。左边的设备收到该地址后会进行处理。

在指定vrf中安装路由

struct nexthop *route_entry_nexthop_ipv4_ifindex_add(struct route_entry *re,
                             struct in_addr *ipv4,
                             struct in_addr *src,
                             ifindex_t ifindex,
                             vrf_id_t nh_vrf_id)
{
    struct nexthop *nexthop;
    struct interface *ifp;

    nexthop = nexthop_new();
    nexthop->vrf_id = nh_vrf_id;
    nexthop->type = NEXTHOP_TYPE_IPV4_IFINDEX;
    nexthop->gate.ipv4 = *ipv4;
    if (src)
        nexthop->src.ipv4 = *src;
    nexthop->ifindex = ifindex;
    ifp = if_lookup_by_index(nexthop->ifindex, nh_vrf_id);
    /*Pending: need to think if null ifp here is ok during bootup?
      There was a crash because ifp here was coming to be NULL */
    if (ifp)
        if (connected_is_unnumbered(ifp))//接口必须是没有配置IP的,如果配置了IP会导致路由不能准确下发
            SET_FLAG(nexthop->flags, NEXTHOP_FLAG_ONLINK);//设置NEXTHOP_FLAG_ONLINK标志

    route_entry_nexthop_add(re, nexthop);

    return nexthop;
}

通过上面的函数整理出路由的下一跳后,使用如下函数添加路由:

/*
 * Update or delete a prefix from the kernel,
 * using info from a dataplane context.
 */
enum zebra_dplane_result kernel_route_update(struct zebra_dplane_ctx *ctx)
{
    int cmd, ret;
    const struct prefix *p = dplane_ctx_get_dest(ctx);
    struct nexthop *nexthop;

    if (dplane_ctx_get_op(ctx) == DPLANE_OP_ROUTE_DELETE) {
        cmd = RTM_DELROUTE;
    } else if (dplane_ctx_get_op(ctx) == DPLANE_OP_ROUTE_INSTALL) {
        cmd = RTM_NEWROUTE;
    } else if (dplane_ctx_get_op(ctx) == DPLANE_OP_ROUTE_UPDATE) {

        if (p->family == AF_INET || v6_rr_semantics) {
            /* Single 'replace' operation */
            cmd = RTM_NEWROUTE;
        } else {
            /*
             * So v6 route replace semantics are not in
             * the kernel at this point as I understand it.
             * so let's do a delete then an add.
             * In the future once v6 route replace semantics
             * are in we can figure out what to do here to
             * allow working with old and new kernels.
             *
             * I'm also intentionally ignoring the failure case
             * of the route delete.  If that happens yeah we're
             * screwed.
             */
            if (!RSYSTEM_ROUTE(dplane_ctx_get_old_type(ctx)))
                (void)netlink_route_multipath(RTM_DELROUTE,
                                  ctx);
            cmd = RTM_NEWROUTE;
        }

    } else {
        return ZEBRA_DPLANE_REQUEST_FAILURE;
    }

    if (!RSYSTEM_ROUTE(dplane_ctx_get_type(ctx)))
        ret = netlink_route_multipath(cmd, ctx);
    else
        ret = 0;
    if ((cmd == RTM_NEWROUTE) && (ret == 0)) {
        /* Update installed nexthops to signal which have been
         * installed.
         */
        for (ALL_NEXTHOPS_PTR(dplane_ctx_get_ng(ctx), nexthop)) {
            if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_RECURSIVE))
                continue;

            if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE)) {
                SET_FLAG(nexthop->flags, NEXTHOP_FLAG_FIB);
            }
        }
    }

    return (ret == 0 ?
        ZEBRA_DPLANE_REQUEST_SUCCESS : ZEBRA_DPLANE_REQUEST_FAILURE);
}

可以使用如下命令达到同样的效果:

sudo ip route add 192.168.1.0/24 via 10.200.200.1 dev br100 proto bgp metric 20 onlink 
#注意onlink属性一定要添加,表示直连的邻居,从上面的代码和可以看出

提取路由mac和下一跳ip构建邻居(这个邻居比较特殊,其中mac是overlay的mac,而IP是underlay的IP),在linux内核中添加邻居表项,且设置了noarp属性。

//添加NUD_NOARP邻居
int kernel_add_neigh(struct interface *ifp, struct ipaddr *ip,
             struct ethaddr *mac, uint8_t flags)
{
    return netlink_neigh_update2(ifp, ip, mac, flags,
                     NUD_NOARP, RTM_NEWNEIGH);
}

可以使用ip monitor命令监听到这一过程:

10.200.200.1 dev br100 lladdr 02:00:0a:de:de:01 NOARP

可以使用命令sudo ip neigh add 10.200.200.1 dev br100 lladdr 02:00:0a:de:de:01 nud noarp vrf evpn-vrf
达到相同的结果。

同时使用rmac和下一跳IP构建fdb表项:

int kernel_add_mac(struct interface *ifp, vlanid_t vid, struct ethaddr *mac,
           struct in_addr vtep_ip, bool sticky)
{
    return netlink_macfdb_update(ifp, vid, mac, vtep_ip, RTM_NEWNEIGH,
                     sticky);
}

可以使用如下命令得到相同的效果:

sudo bridge fdb add 02:00:0a:de:de:01 dev vxlan100 dst 10.200.200.1 self extern_learn

调用栈为:

#0  zebra_vxlan_evpn_vrf_route_add (vrf_id=11, rmac=0x7fff76e7cba0, vtep_ip=0x7fff76e7cacc, host_prefix=0x7fff76e7caf0) at zebra/zebra_vxlan.c:5680
#1  0x0000557f9485a716 in zread_route_add (client=0x557f96929790, hdr=<optimized out>, msg=<optimized out>, zvrf=<optimized out>) at zebra/zapi_msg.c:1488
#2  0x0000557f9485cebb in zserv_handle_commands (client=client@entry=0x557f96929790, msg=msg@entry=0x7ff374001040) at zebra/zapi_msg.c:2532
#3  0x0000557f9485714e in zserv_process_messages (thread=<optimized out>) at zebra/zserv.c:523
#4  0x00007ff37f3ef968 in thread_call (thread=thread@entry=0x7fff76e7e910) at lib/thread.c:1547
#5  0x00007ff37f3cc257 in frr_run (master=0x557f9672baa0) at lib/libfrr.c:1021
#6  0x0000557f9481b1be in main (argc=2, argv=0x7fff76e7ecd8) at zebra/main.c:475
(gdb) s

TYPE4 & TYPE1

TYPE4用于MULTIHOMING,暂时了解不多。


ouyangxibao
189 声望162 粉丝

不生产代码,只是代码的搬运工