VPP接口层分析

1

VPP接口层分析

​ 接口层是硬件驱动和上层软件之间一层抽象代码,屏蔽硬件的差异,为上层软件提供一些统一的操作接口。上层软件调用接口层的操作进行报文的读入与发出,同时可以进行硬件设备的设置以及相关信息(比如统计数据)的读取。

​ vpp支持多种驱动类型的网络设备,比如dpdk,netmap,af_packet等等。同一种驱动的物理设备可以有多个,比如一个设备可以有多个dpdk接口,因此抽象了链路层接口hw_interface。在网络中有很多的虚拟设备,它们依附于物理设备,例如vlan设备对于同一个hw_interface可以有4096个虚拟的子接口。为了描述这些虚拟接口,vpp在hw_interface的基础上又封装了一层sw_interface来表示接口。sw_interface是整个接口层对上层软件的一个抽象,上层软件使用sw_interface索引用来表示具体操作的设备。

VPP支持的物理网络设备类型

一类设备表示使用相同硬件驱动的设备,比如dpdk类型的以太网设备,af_packet类型的虚拟以太网设备,netmap以太网设备等,属于物理层的描述。

设备类描述结构

/* A class of hardware interface devices. */
/* 一类硬件接口的操作函数集合,这些函数是在硬件设备的驱动上封装的一层 */
typedef struct _vnet_device_class
{
    /* Index into main vector.类索引 */
    u32 index;

    /* Device name (e.g. "FOOBAR 1234a").设备类名字 */
    char *name;

    /* Function to call when hardware interface is added/deleted. */
    /* 添加/删除一个该类设备的实例函数 */
    vnet_interface_function_t *interface_add_del_function;

    /* Function to bring device administratively up/down. */
    /* 设备UP/DOWN操作函数 */
    vnet_interface_function_t *admin_up_down_function;

    /* Function to call when sub-interface is added/deleted */
    /* 以该类型设备为主设备,添加/删除一个子接口的函数 */
    vnet_subif_add_del_function_t *subif_add_del_function;

    /* Function to call interface rx mode is changed */
    /* 接收模式变化函数 */
    vnet_interface_set_rx_mode_function_t *rx_mode_change_function;

    /* Function to call interface l2 mode is changed */
    /* 接口的二层模式变化操作函数 */
    vnet_interface_set_l2_mode_function_t *set_l2_mode_function;

    /* Redistribute flag changes/existence of this interface class. */
    u32 redistribute;

    /* Transmit function. */
    /* 发送函数 */
    vlib_node_function_t *tx_function;

    /* Transmit function candidate registration with priority */
    /* 注册的多个候选的发送函数,最终根据优先级选择一个最高的赋值给tx_function */
    vlib_node_fn_registration_t *tx_fn_registrations;

    /* Error strings indexed by error code for this node. */
    /* 发送函数错误原因字符数组 */
    char **tx_function_error_strings;

    /* Number of error codes used by this node. */
    /* tx_function_error_strings数组大小 */
    u32 tx_function_n_errors;

    /* Renumber device name [only!] support, a control-plane kludge */
    int (*name_renumber) (struct vnet_hw_interface_t * hi,
                          u32 new_dev_instance);

    /* Interface flow offload operations */
    /* 流量卸载功能操作函数集合 */                      
    vnet_flow_dev_ops_function_t *flow_ops_function;

    /* Format device instance as name. */
    format_function_t *format_device_name;

    /* Parse function for device name. */
    unformat_function_t *unformat_device_name;

    /* Format device verbosely for this class. */
    format_function_t *format_device;

    /* Trace buffer format for TX function. */
    format_function_t *format_tx_trace;

    /* Format flow offload entry */
    format_function_t *format_flow;

    /* Function to clear hardware counters for device. */
    /* 清除统计函数 */
    void (*clear_counters) (u32 dev_class_instance);

    uword (*is_valid_class_for_interface) (struct vnet_main_t * vnm,
                                           u32 hw_if_index,
                                           u32 hw_class_index);

    /* Called when hardware class of an interface changes. */
    void (*hw_class_change) (struct vnet_main_t * vnm,
                             u32 hw_if_index, u32 new_hw_class_index);

    /* Called to redirect traffic from a specific interface instance */
    /* 强制重定向一个接口的流量到指定的node */                         
    void (*rx_redirect_to_node) (struct vnet_main_t * vnm,
                                 u32 hw_if_index, u32 node_index);

    /* Link-list of all device classes set up by constructors created below */
    /* 形成链表 */
    struct _vnet_device_class *next_class_registration;

    /* Function to set mac address. */
    /* 链路层地址变化函数 */
    vnet_interface_set_mac_address_function_t *mac_addr_change_function;
} vnet_device_class_t;

设备类输出函数注册结构

该结构与node的功能函数注册结构是一样的

typedef struct _vlib_node_fn_registration
{
    vlib_node_function_t *function;
    int priority;
    struct _vlib_node_fn_registration *next_registration;
    char *name;
} vlib_node_fn_registration_t;

注册宏

#ifndef CLIB_MARCH_VARIANT /* 注册/去注册设备操作类函数集合的宏 */
#define VNET_DEVICE_CLASS(x,...)                                        \
  __VA_ARGS__ vnet_device_class_t x;                                    \
static void __vnet_add_device_class_registration_##x (void)             \
    __attribute__((__constructor__)) ;                                  \
static void __vnet_add_device_class_registration_##x (void)             \
{                                                                       \
    vnet_main_t * vnm = vnet_get_main();                                \
    x.next_class_registration = vnm->device_class_registrations;        \
    vnm->device_class_registrations = &x;                               \
}                                                                       \
static void __vnet_rm_device_class_registration_##x (void)              \
    __attribute__((__destructor__)) ;                                   \
static void __vnet_rm_device_class_registration_##x (void)              \
{                                                                       \
    vnet_main_t * vnm = vnet_get_main();                                \
    VLIB_REMOVE_FROM_LINKED_LIST (vnm->device_class_registrations,      \
                                  &x, next_class_registration);         \
}                                                                       \
__VA_ARGS__ vnet_device_class_t x
#else
/* create unused pointer to silence compiler warnings and get whole
   function optimized out */
#define VNET_DEVICE_CLASS(x,...)                                        \
static __clib_unused vnet_device_class_t __clib_unused_##x
#endif
/* 注册/去注册设备类的输出函数宏 */
#define VNET_DEVICE_CLASS_TX_FN(devclass)                \
uword CLIB_MARCH_SFX (devclass##_tx_fn)();                \
static vlib_node_fn_registration_t                    \
  CLIB_MARCH_SFX(devclass##_tx_fn_registration) =            \
  { .function = &CLIB_MARCH_SFX (devclass##_tx_fn), };            \
                                    \
static void __clib_constructor                        \
CLIB_MARCH_SFX (devclass##_tx_fn_multiarch_register) (void)        \
{                                    \
  extern vnet_device_class_t devclass;                    \
  vlib_node_fn_registration_t *r;                    \
  r = &CLIB_MARCH_SFX (devclass##_tx_fn_registration);            \
  r->priority = CLIB_MARCH_FN_PRIORITY();                \
  r->next_registration = devclass.tx_fn_registrations;            \
  devclass.tx_fn_registrations = r;                    \
}                                    \
uword CLIB_CPU_OPTIMIZED CLIB_MARCH_SFX (devclass##_tx_fn)

实例

/* *INDENT-OFF* */
VNET_DEVICE_CLASS (dpdk_device_class) =
{
    .name = "dpdk",
    .tx_function_n_errors = DPDK_TX_FUNC_N_ERROR,
    .tx_function_error_strings = dpdk_tx_func_error_strings,
    .format_device_name = format_dpdk_device_name,
    .format_device = format_dpdk_device,
    .format_tx_trace = format_dpdk_tx_trace,
    .clear_counters = dpdk_clear_hw_interface_counters,
    .admin_up_down_function = dpdk_interface_admin_up_down,
    .subif_add_del_function = dpdk_subif_add_del_function,
    .rx_redirect_to_node = dpdk_set_interface_next_node,
    .mac_addr_change_function = dpdk_set_mac_address,
    .format_flow = format_dpdk_flow,
    .flow_ops_function = dpdk_flow_ops_fn,
};

链路层interface类型

​ 在物理设备上,根据接口的功能进一步抽象一层硬件设备类型,七层中属于链路层。比如ethernet设备,bond设备,vlan设备等。

interface类型描述结构

/* Layer-2 (e.g. Ethernet) interface class. */
typedef struct _vnet_hw_interface_class
{
    /* Index into main vector. */
    /* 索引 */
    u32 index;

    /* Class name (e.g. "Ethernet"). 其所属类名字 */
    char *name;

    /* Flags 标志,目前只有一个,是否为点到点设备,点到点设备不需要邻居信息 */
    vnet_hw_interface_class_flags_t flags;

    /* Function to call when hardware interface is added/deleted. */
    vnet_interface_function_t *interface_add_del_function;

    /* Function to bring interface administratively up/down. */
    vnet_interface_function_t *admin_up_down_function;

    /* Function to call when link state changes. */
    vnet_interface_function_t *link_up_down_function;

    /* Function to call when link MAC changes. */
    vnet_interface_set_mac_address_function_t *mac_addr_change_function;

    /* Format function to display interface name. */
    format_function_t *format_interface_name;

    /* Format function to display interface address. */
    format_function_t *format_address;

    /* Format packet header for this interface class. */
    format_function_t *format_header;

    /* Format device verbosely for this class. */
    format_function_t *format_device;

    /* Parser for hardware (e.g. ethernet) address. */
    unformat_function_t *unformat_hw_address;

    /* Parser for packet header for e.g. rewrite string. */
    unformat_function_t *unformat_header;

    /* Builds a rewrite string for the interface to the destination
     * for the payload/link type. */
    u8 *(*build_rewrite) (struct vnet_main_t * vnm,
                          u32 sw_if_index,
                          vnet_link_t link_type, const void *dst_hw_address);

    /* Update an adjacency added by FIB (as opposed to via the
     * neighbour resolution protocol). */
    void (*update_adjacency) (struct vnet_main_t * vnm,
                              u32 sw_if_index, u32 adj_index);

    uword (*is_valid_class_for_interface) (struct vnet_main_t * vnm,
                                           u32 hw_if_index,
                                           u32 hw_class_index);

    /* Called when hw interface class is changed and old hardware instance
       may want to be deleted. */
    void (*hw_class_change) (struct vnet_main_t * vnm, u32 hw_if_index,
                             u32 old_class_index, u32 new_class_index);

    /* List of hw interface classes, built by constructors */
    struct _vnet_hw_interface_class *next_class_registration;

} vnet_hw_interface_class_t;

注册宏

/**
 * @brief Default adjacency update function
 */
extern void default_update_adjacency (struct vnet_main_t *vnm,
                                      u32 sw_if_index, u32 adj_index);

#define VNET_HW_INTERFACE_CLASS(x,...)                                  \
  __VA_ARGS__ vnet_hw_interface_class_t x;                              \
static void __vnet_add_hw_interface_class_registration_##x (void)       \
    __attribute__((__constructor__)) ;                                  \
static void __vnet_add_hw_interface_class_registration_##x (void)       \
{                                                                       \
    vnet_main_t * vnm = vnet_get_main();                                \
    x.next_class_registration = vnm->hw_interface_class_registrations;  \
    vnm->hw_interface_class_registrations = &x;                         \
}                                                                       \
static void __vnet_rm_hw_interface_class_registration_##x (void)        \
    __attribute__((__destructor__)) ;                                   \
static void __vnet_rm_hw_interface_class_registration_##x (void)        \
{                                                                       \
    vnet_main_t * vnm = vnet_get_main();                                \
    VLIB_REMOVE_FROM_LINKED_LIST (vnm->hw_interface_class_registrations,\
                                  &x, next_class_registration);         \
}                                                                       \
__VA_ARGS__ vnet_hw_interface_class_t x

实例

/* *INDENT-OFF* */
VNET_HW_INTERFACE_CLASS (ethernet_hw_interface_class) =
{
    .name = "Ethernet",
    .format_address = format_ethernet_address,
    .format_header = format_ethernet_header_with_length,
    .unformat_hw_address = unformat_ethernet_address,
    .unformat_header = unformat_ethernet_header,
    .build_rewrite = ethernet_build_rewrite,
    .update_adjacency = ethernet_update_adjacency,
    .mac_addr_change_function = ethernet_mac_change,
};
/* *INDENT-ON* */

软件层interface

typedef enum vnet_sw_interface_flags_t_
{
    VNET_SW_INTERFACE_FLAG_NONE = 0,
    /* Interface is "up" meaning administratively up.
       Up in the sense of link state being up is maintained by hardware interface. */
    VNET_SW_INTERFACE_FLAG_ADMIN_UP = (1 << 0),

    /* Interface is disabled for forwarding: punt all traffic to slow-path. */
    VNET_SW_INTERFACE_FLAG_PUNT = (1 << 1),

    VNET_SW_INTERFACE_FLAG_PROXY_ARP = (1 << 2),

    VNET_SW_INTERFACE_FLAG_UNNUMBERED = (1 << 3),

    VNET_SW_INTERFACE_FLAG_BOND_SLAVE = (1 << 4),

    /* Interface does not appear in CLI/API */
    VNET_SW_INTERFACE_FLAG_HIDDEN = (1 << 5),

    /* Interface in ERROR state */
    VNET_SW_INTERFACE_FLAG_ERROR = (1 << 6),

    /* Interface has IP configured directed broadcast */
    VNET_SW_INTERFACE_FLAG_DIRECTED_BCAST = (1 << 7),

} __attribute__ ((packed)) vnet_sw_interface_flags_t;

/* Software-interface.  This corresponds to a Ethernet VLAN, ATM vc, a
   tunnel, etc.  Configuration (e.g. IP address) gets attached to
   software interface. 
 * 软件层接口,例如vlan,atm,tunnel等
 */
typedef struct
{
    /* 接口类型 */
    vnet_sw_interface_type_t type:16;
    /* 接口标志 */
    vnet_sw_interface_flags_t flags;

    /* Index for this interface. */
    /* 软件接口索引 */
    u32 sw_if_index;

    /* Software interface index of super-interface;
       equal to sw_if_index if this interface is not a
       sub-interface.
     * 如果该接口是一个子接口的话,那么该值为其依附的接口的sw_if_index,否则就是sw_if_index
     */
    u32 sup_sw_if_index;

    /* this swif is unnumbered, use addresses on unnumbered_sw_if_index... */
    u32 unnumbered_sw_if_index;

    /* VNET_SW_INTERFACE_TYPE_HARDWARE. */
    u32 hw_if_index;

    /* MTU for network layer (not including L2 headers) */
    /* 网络层mtu,不包括链路层 */
    u32 mtu[VNET_N_MTU];

    /* VNET_SW_INTERFACE_TYPE_SUB. */
    /* 如果该接口是子接口的话,那么子接口信息保存在该结构体中 */
    vnet_sub_interface_t sub;

    /* VNET_SW_INTERFACE_TYPE_P2P. */
    vnet_p2p_sub_interface_t p2p;

    vnet_flood_class_t flood_class;
} vnet_sw_interface_t;

子接口

typedef enum
{
    /* A hw interface. */
    VNET_SW_INTERFACE_TYPE_HARDWARE,

    /* A sub-interface. */
    VNET_SW_INTERFACE_TYPE_SUB,
    VNET_SW_INTERFACE_TYPE_P2P,
    VNET_SW_INTERFACE_TYPE_PIPE,
} vnet_sw_interface_type_t;

typedef struct
{
    /*
     * Subinterface ID. A number 0-N to uniquely identify
     * this subinterface under the main (parent?) interface
     */
    u32 id;

    /* Classification data. Used to associate packet header with subinterface. */
    struct
    {
        u16 outer_vlan_id;
        u16 inner_vlan_id;
        union
        {
            u16 raw_flags;
            struct
            {
                u16 no_tags:1;
                u16 one_tag:1;
                u16 two_tags:1;
                u16 dot1ad:1;        /* 0 = dot1q, 1=dot1ad */
                u16 exact_match:1;
                u16 default_sub:1;
                u16 outer_vlan_id_any:1;
                u16 inner_vlan_id_any:1;
            } flags;
        };
    } eth;
} vnet_sub_interface_t;

接口层初始化过程

​ 前面所述的设备和接口注册信息,是在main函数之前进行注册的,main函数需要对这些信息进行整理加工。在vpp中vnet_main_t中的interface_main成员维护了接口相关的信息。

接口管理接口结构

/* 接口管理全局结构 */
typedef struct
{
    /* Hardware interfaces. 硬件接口数组,所有链路层接口都放置在该数组中 */
    vnet_hw_interface_t *hw_interfaces;

    /* Hash table mapping HW interface name to index. 链路岑该接口名字与索引映射表*/
    uword *hw_interface_by_name;

    /* Vectors if hardware interface classes and device classes. */
    /* 静态注册的链路层接口类型链表 */
    vnet_hw_interface_class_t *hw_interface_classes;
    /* 静态注册的物理层设备类型链表 */    
    vnet_device_class_t *device_classes;

    /* Hash table mapping name to hw interface/device class. */
    /* 链路层接口类型索引与名字映射表 */
    uword *hw_interface_class_by_name;
    /* 设备类索引与名字映射表 */
    uword *device_class_by_name;

    /* Software interfaces. */
    /* 软件接口数组 */
    vnet_sw_interface_t *sw_interfaces;

    /* Hash table mapping sub intfc sw_if_index by sup sw_if_index and sub id */
    uword *sw_if_index_by_sup_and_sub;

    /* Software interface counters both simple and combined
     * packet and byte counters. 
     * 软件接口统计与其对应的锁
     */
    volatile u32 *sw_if_counter_lock;
    vlib_simple_counter_main_t *sw_if_counters;
    vlib_combined_counter_main_t *combined_sw_if_counters;

    vnet_hw_interface_nodes_t *deleted_hw_interface_nodes;

    /* pcap drop tracing */
    int drop_pcap_enable;
    pcap_main_t pcap_main;
    u8 *pcap_filename;
    u32 pcap_sw_if_index;
    u32 pcap_pkts_to_capture;
    uword *pcap_drop_filter_hash;

    /* per-thread data */
    vnet_interface_per_thread_data_t *per_thread_data;

    /* enable GSO processing in packet path if this count is > 0 */
    u32 gso_interface_count;

    /* feature_arc_index */
    u8 output_feature_arc_index;
} vnet_interface_main_t;

接口管理初始化函数

/* 接口初始化 */
clib_error_t *
vnet_interface_init (vlib_main_t * vm)
{
    vnet_main_t *vnm = vnet_get_main ();
    vnet_interface_main_t *im = &vnm->interface_main;
    vlib_buffer_t *b = 0;
    vnet_buffer_opaque_t *o = 0;
    clib_error_t *error;

    /*
     * Keep people from shooting themselves in the foot.
     */
    if (sizeof (b->opaque) != sizeof (vnet_buffer_opaque_t))
    {
#define _(a) if (sizeof(o->a) > sizeof (o->unused))                     \
      clib_warning                                                      \
        ("FATAL: size of opaque union subtype %s is %d (max %d)",       \
         #a, sizeof(o->a), sizeof (o->unused));
        foreach_buffer_opaque_union_subtype;
#undef _

        return clib_error_return
        (0, "FATAL: size of vlib buffer opaque %d, size of vnet opaque %d",
         sizeof (b->opaque), sizeof (vnet_buffer_opaque_t));
    }
    /* 统计信息锁,负责保护sw_if_counters */
    im->sw_if_counter_lock = clib_mem_alloc_aligned (CLIB_CACHE_LINE_BYTES,
            CLIB_CACHE_LINE_BYTES);
    im->sw_if_counter_lock[0] = 1;    /* should be no need */

    vec_validate (im->sw_if_counters, VNET_N_SIMPLE_INTERFACE_COUNTER - 1);
#define _(E,n,p)    /* 各种统计信息初始化 */                        \
  im->sw_if_counters[VNET_INTERFACE_COUNTER_##E].name = #n;        \
  im->sw_if_counters[VNET_INTERFACE_COUNTER_##E].stat_segment_name = "/" #p "/" #n;
    foreach_simple_interface_counter_name
#undef _
    vec_validate (im->combined_sw_if_counters,
                  VNET_N_COMBINED_INTERFACE_COUNTER - 1);
#define _(E,n,p)                            \
  im->combined_sw_if_counters[VNET_INTERFACE_COUNTER_##E].name = #n;    \
  im->combined_sw_if_counters[VNET_INTERFACE_COUNTER_##E].stat_segment_name = "/" #p "/" #n;
    foreach_combined_interface_counter_name
#undef _
    im->sw_if_counter_lock[0] = 0;
    /* 构建设备类型描述结构体索引与类型名之间的映射表 */
    im->device_class_by_name = hash_create_string ( /* size */ 0,
            sizeof (uword));
    {
        vnet_device_class_t *c;
        /* 遍历每一个注册的设备 */
        c = vnm->device_class_registrations;

        while (c)
        {
            c->index = vec_len (im->device_classes);/* 分配索引 */
            /* 设置索引与名字之间的映射关系 */
            hash_set_mem (im->device_class_by_name, c->name, c->index);

            if (c->tx_fn_registrations)
            {
                vlib_node_fn_registration_t *fnr = c->tx_fn_registrations;
                int priority = -1;

                /* to avoid confusion, please remove ".tx_function" statement
                   from VNET_DEVICE_CLASS() if using function candidates */
                ASSERT (c->tx_function == 0);
                /* 获取最高优先级的发送函数 */
                while (fnr)
                {
                    if (fnr->priority > priority)
                    {
                        priority = fnr->priority;
                        c->tx_function = fnr->function;
                    }
                    fnr = fnr->next_registration;
                }
            }

            vec_add1 (im->device_classes, c[0]);
            c = c->next_class_registration;
        }
    }
    /* 初始化硬件interface索引与名字映射表 */
    im->hw_interface_class_by_name = hash_create_string ( /* size */ 0,
            sizeof (uword));

    im->sw_if_index_by_sup_and_sub = hash_create_mem (0, sizeof (u64),
            sizeof (uword));
    {
        vnet_hw_interface_class_t *c;
        /* 遍历每一个注册的链路层interface类型 */
        c = vnm->hw_interface_class_registrations;

        while (c)
        {
            c->index = vec_len (im->hw_interface_classes);
            hash_set_mem (im->hw_interface_class_by_name, c->name, c->index);

            if (NULL == c->build_rewrite)
                c->build_rewrite = default_build_rewrite;
            if (NULL == c->update_adjacency)
                c->update_adjacency = default_update_adjacency;

            vec_add1 (im->hw_interface_classes, c[0]);
            c = c->next_class_registration;
        }
    }

    im->gso_interface_count = 0;
    /* init per-thread data */
    vec_validate_aligned (im->per_thread_data, vlib_num_workers (),
                          CLIB_CACHE_LINE_BYTES);

    if ((error = vlib_call_init_function (vm, vnet_interface_cli_init)))
        return error;

    vnm->interface_tag_by_sw_if_index = hash_create (0, sizeof (uword));

#if VLIB_BUFFER_TRACE_TRAJECTORY > 0
    if ((error = vlib_call_init_function (vm, trajectory_trace_init)))
        return error;
#endif

    return 0;
}

VLIB_INIT_FUNCTION (vnet_interface_init);

添加一个接口

以af_packet类型的以太网接口为例进行说明

/* 创建一个af_packet类型的接口 */
int
af_packet_create_if (vlib_main_t * vm, u8 * host_if_name, u8 * hw_addr_set,
                     u32 * sw_if_index)
{
    af_packet_main_t *apm = &af_packet_main;
    int ret, fd = -1, fd2 = -1;
    struct tpacket_req *rx_req = 0;
    struct tpacket_req *tx_req = 0;
    struct ifreq ifr;
    u8 *ring = 0;
    af_packet_if_t *apif = 0;
    u8 hw_addr[6];
    clib_error_t *error;
    vnet_sw_interface_t *sw;
    vnet_hw_interface_t *hw;
    vlib_thread_main_t *tm = vlib_get_thread_main ();
    vnet_main_t *vnm = vnet_get_main ();
    uword *p;
    uword if_index;
    u8 *host_if_name_dup = vec_dup (host_if_name);
    int host_if_index = -1;

    p = mhash_get (&apm->if_index_by_host_if_name, host_if_name);
    if (p)
    {
        apif = vec_elt_at_index (apm->interfaces, p[0]);
        *sw_if_index = apif->sw_if_index;
        return VNET_API_ERROR_IF_ALREADY_EXISTS;
    }

    vec_validate (rx_req, 0);
    rx_req->tp_block_size = AF_PACKET_RX_BLOCK_SIZE;
    rx_req->tp_frame_size = AF_PACKET_RX_FRAME_SIZE;
    rx_req->tp_block_nr = AF_PACKET_RX_BLOCK_NR;
    rx_req->tp_frame_nr = AF_PACKET_RX_FRAME_NR;

    vec_validate (tx_req, 0);
    tx_req->tp_block_size = AF_PACKET_TX_BLOCK_SIZE;
    tx_req->tp_frame_size = AF_PACKET_TX_FRAME_SIZE;
    tx_req->tp_block_nr = AF_PACKET_TX_BLOCK_NR;
    tx_req->tp_frame_nr = AF_PACKET_TX_FRAME_NR;

    /*
     * make sure host side of interface is 'UP' before binding AF_PACKET
     * socket on it.
     * 需要确保af_packet类型的接口依附的主机侧的接口是up的
     */
    if ((fd2 = socket (AF_UNIX, SOCK_DGRAM, 0)) < 0)
    {
        vlib_log_debug (apm->log_class, "Failed to create socket");
        ret = VNET_API_ERROR_SYSCALL_ERROR_1;
        goto error;
    }

    clib_memcpy (ifr.ifr_name, (const char *) host_if_name,
                 vec_len (host_if_name));
    /* 根据名字获取主机接口的索引 */
    if ((ret = ioctl (fd2, SIOCGIFINDEX, &ifr)) < 0)
    {
        vlib_log_debug (apm->log_class, "af_packet_create error: %d", ret);
        close (fd2);
        return VNET_API_ERROR_INVALID_INTERFACE;
    }

    host_if_index = ifr.ifr_ifindex;
    /* 获取主机接口的标志信息 */
    if ((ret = ioctl (fd2, SIOCGIFFLAGS, &ifr)) < 0)
    {
        vlib_log_warn (apm->log_class, "af_packet_create error: %d", ret);
        goto error;
    }
    /* 如果没有up,那么设置其为UP */
    if (!(ifr.ifr_flags & IFF_UP))
    {
        ifr.ifr_flags |= IFF_UP;
        if ((ret = ioctl (fd2, SIOCSIFFLAGS, &ifr)) < 0)
        {
            vlib_log_warn (apm->log_class, "af_packet_create error: %d", ret);
            goto error;
        }
    }

    if (fd2 > -1)
        close (fd2);
    /* 创建af_packet套接字 */
    ret = create_packet_v2_sock (host_if_index, rx_req, tx_req, &fd, &ring);

    if (ret != 0)
        goto error;

    ret = is_bridge (host_if_name);

    if (ret == 0)            /* is a bridge, ignore state */
        host_if_index = -1;

    /* So far everything looks good, let's create interface */
    pool_get (apm->interfaces, apif);
    if_index = apif - apm->interfaces;

    apif->host_if_index = host_if_index;
    apif->fd = fd;
    apif->rx_ring = ring;
    apif->tx_ring = ring + rx_req->tp_block_size * rx_req->tp_block_nr;
    apif->rx_req = rx_req;
    apif->tx_req = tx_req;
    apif->host_if_name = host_if_name_dup;
    apif->per_interface_next_index = ~0;
    apif->next_tx_frame = 0;
    apif->next_rx_frame = 0;

    if (tm->n_vlib_mains > 1)/* 添加epoll监听事件,用于模拟中断 */
        clib_spinlock_init (&apif->lockp);

    {
        clib_file_t template = { 0 };
        template.read_function = af_packet_fd_read_ready;
        template.file_descriptor = fd;
        template.private_data = if_index;
        template.flags = UNIX_FILE_EVENT_EDGE_TRIGGERED;
        template.description = format (0, "%U", format_af_packet_device_name,
                                       if_index);
        apif->clib_file_index = clib_file_add (&file_main, &template);
    }

    /*use configured or generate random MAC address */
    if (hw_addr_set)
        clib_memcpy (hw_addr, hw_addr_set, 6);
    else
    {
        f64 now = vlib_time_now (vm);
        u32 rnd;
        rnd = (u32) (now * 1e6);
        rnd = random_u32 (&rnd);

        clib_memcpy (hw_addr + 2, &rnd, sizeof (rnd));
        hw_addr[0] = 2;
        hw_addr[1] = 0xfe;
    }
    /* 注册以太网接口 */
    error = ethernet_register_interface (vnm, af_packet_device_class.index,
                                         if_index, hw_addr, &apif->hw_if_index,
                                         af_packet_eth_flag_change);

    if (error)
    {
        clib_memset (apif, 0, sizeof (*apif));
        pool_put (apm->interfaces, apif);
        vlib_log_err (apm->log_class, "Unable to register interface: %U",
                      format_clib_error, error);
        clib_error_free (error);
        ret = VNET_API_ERROR_SYSCALL_ERROR_1;
        goto error;
    }

    sw = vnet_get_hw_sw_interface (vnm, apif->hw_if_index);
    hw = vnet_get_hw_interface (vnm, apif->hw_if_index);
    apif->sw_if_index = sw->sw_if_index;
    /* 设置该接口对应的输入节点索引 */
    vnet_hw_interface_set_input_node (vnm, apif->hw_if_index,
                                      af_packet_input_node.index);
    /* 分配该接口到指定的收包线程中 */
    vnet_hw_interface_assign_rx_thread (vnm, apif->hw_if_index, 0,    /* queue */
                                        ~0 /* any cpu */ );
    /* 接口支持中断模式 */
    hw->flags |= VNET_HW_INTERFACE_FLAG_SUPPORTS_INT_MODE;
    vnet_hw_interface_set_flags (vnm, apif->hw_if_index,
                                 VNET_HW_INTERFACE_FLAG_LINK_UP);

    vnet_hw_interface_set_rx_mode (vnm, apif->hw_if_index, 0,
                                   VNET_HW_INTERFACE_RX_MODE_INTERRUPT);

    mhash_set_mem (&apm->if_index_by_host_if_name, host_if_name_dup, &if_index,
                   0);
    if (sw_if_index)
        *sw_if_index = apif->sw_if_index;

    return 0;

error:
    if (fd2 > -1)
        close (fd2);
    vec_free (host_if_name_dup);
    vec_free (rx_req);
    vec_free (tx_req);
    return ret;
}

注册ethernet接口

/* 注册以太网接口 */
clib_error_t *
ethernet_register_interface (vnet_main_t * vnm,
                             u32 dev_class_index,/* 设备类型 */
                             u32 dev_instance,/* 设备实例索引 */
                             const u8 * address,/* 链路层地址 */
                             u32 * hw_if_index_return,/* 返回的硬件设备索引 */
                             ethernet_flag_change_function_t flag_change)
{
    ethernet_main_t *em = &ethernet_main;
    ethernet_interface_t *ei;
    vnet_hw_interface_t *hi;
    clib_error_t *error = 0;
    u32 hw_if_index;
    /* 获取一个以太网设备实例 */
    pool_get (em->interfaces, ei);
    /* 用于配置以太网设备的硬件特性,比如混杂模式等 */
    ei->flag_change = flag_change;
    /* 注册以太网接口,给该以太网设备分配一个硬件索引 */
    hw_if_index = vnet_register_interface
                  (vnm,
                   dev_class_index, dev_instance,
                   ethernet_hw_interface_class.index, ei - em->interfaces);
    *hw_if_index_return = hw_if_index;

    hi = vnet_get_hw_interface (vnm, hw_if_index);
    /* 设置node的以太网相关的函数 */
    ethernet_setup_node (vnm->vlib_main, hi->output_node_index);

    hi->min_packet_bytes = hi->min_supported_packet_bytes =
                               ETHERNET_MIN_PACKET_BYTES;
    hi->max_packet_bytes = hi->max_supported_packet_bytes =
                               ETHERNET_MAX_PACKET_BYTES;

    /* Standard default ethernet MTU. */
    /* 标准的以太网mtu为9000 */
    vnet_sw_interface_set_mtu (vnm, hi->sw_if_index, 9000);

    clib_memcpy (ei->address, address, sizeof (ei->address));
    vec_add (hi->hw_address, address, sizeof (ei->address));

    if (error)
    {
        pool_put (em->interfaces, ei);
        return error;
    }
    return error;
}

向接口管理中注册一个接口

/* Register an interface instance. */
u32
vnet_register_interface (vnet_main_t * vnm,
                         u32 dev_class_index,
                         u32 dev_instance,/* 使用同一种驱动的设备实例编号 */
                         u32 hw_class_index, 
                         u32 hw_instance)/* 同一类硬件设备的编号,比如以太网设备 */
{
    vnet_interface_main_t *im = &vnm->interface_main;
    vnet_hw_interface_t *hw;
    /* 根据设备类型索引获取对应的类型描述控制块,相当于linux内核的驱动ops */
    vnet_device_class_t *dev_class =
    vnet_get_device_class (vnm, dev_class_index);
    vnet_hw_interface_class_t *hw_class =
    vnet_get_hw_interface_class (vnm, hw_class_index);
    vlib_main_t *vm = vnm->vlib_main;
    vnet_feature_config_main_t *fcm;
    vnet_config_main_t *cm;
    u32 hw_index, i;
    char *tx_node_name = NULL, *output_node_name = NULL;
    /* 分配一个硬件接口描述控制块 */
    pool_get (im->hw_interfaces, hw);
    clib_memset (hw, 0, sizeof (*hw));

    hw_index = hw - im->hw_interfaces;
    hw->hw_if_index = hw_index;
    /* 默认模式就是POLLING模式 */
    hw->default_rx_mode = VNET_HW_INTERFACE_RX_MODE_POLLING;
    /* 设置设备名字 */
    if (dev_class->format_device_name)
        hw->name = format (0, "%U", dev_class->format_device_name, dev_instance);
    else if (hw_class->format_interface_name)
        hw->name = format (0, "%U", hw_class->format_interface_name,
                           dev_instance);
    else
        hw->name = format (0, "%s%x", hw_class->name, dev_instance);
    /* 构建接口名字到接口索引的hash表 */
    if (!im->hw_interface_by_name)
        im->hw_interface_by_name = hash_create_vec ( /* size */ 0,
                sizeof (hw->name[0]),
                sizeof (uword));
    
    /* 设置本接口名字与索引的映射关系 */
    hash_set_mem (im->hw_interface_by_name, hw->name, hw_index);

    /* Make hardware interface point to software interface. */
    /* 在硬件接口的基础上构建软件接口 */
    {
        vnet_sw_interface_t sw =
        {
            .type = VNET_SW_INTERFACE_TYPE_HARDWARE,
            .flood_class = VNET_FLOOD_CLASS_NORMAL,
            .hw_if_index = hw_index
        };
        /* 分配软件接口索引 */
        hw->sw_if_index = vnet_create_sw_interface_no_callbacks (vnm, &sw);
    }
    /* 设置设备类索引 */
    hw->dev_class_index = dev_class_index;
    /* 同类设备的索引 */
    hw->dev_instance = dev_instance;
    /* 硬件类索引 */
    hw->hw_class_index = hw_class_index;
    /* 同硬件类索引 */
    hw->hw_instance = hw_instance;

    hw->max_rate_bits_per_sec = 0;
    hw->min_packet_bytes = 0;
    /* 设置mtu默认为0 */
    vnet_sw_interface_set_mtu (vnm, hw->sw_if_index, 0);
    /* 这类设备没有发送函数,创建一个无发送类型节点 */
    if (dev_class->tx_function == 0)
        goto no_output_nodes;    /* No output/tx nodes to create */
    /* interface不仅需要承当报文输入功能,同时也要承担报文输出功能,所以需要两个node
     * 输入功能采用的是依附于input-node,而输出功能需要单独添加一个node,在vpp实现中
     * 采用了两个node来实现输出功能,一个是hw-interface层,用于构建链路层头,另外一个
     * 物理层,调用驱动发包函数发送报文。
     */
    tx_node_name = (char *) format (0, "%v-tx", hw->name);
    output_node_name = (char *) format (0, "%v-output", hw->name);

    /* If we have previously deleted interface nodes, re-use them. */
    /* 如果以前有些接口node删除了,复用它们 */
    if (vec_len (im->deleted_hw_interface_nodes) > 0)
    {
        vnet_hw_interface_nodes_t *hn;
        vlib_node_t *node;
        vlib_node_runtime_t *nrt;

        hn = vec_end (im->deleted_hw_interface_nodes) - 1;

        hw->tx_node_index = hn->tx_node_index;
        hw->output_node_index = hn->output_node_index;

        vlib_node_rename (vm, hw->tx_node_index, "%v", tx_node_name);
        vlib_node_rename (vm, hw->output_node_index, "%v", output_node_name);

        /* *INDENT-OFF* */
        foreach_vlib_main (
        {
            vnet_interface_output_runtime_t *rt;

            rt = vlib_node_get_runtime_data (this_vlib_main, hw->output_node_index);
            ASSERT (rt->is_deleted == 1);
            rt->is_deleted = 0;
            rt->hw_if_index = hw_index;
            rt->sw_if_index = hw->sw_if_index;
            rt->dev_instance = hw->dev_instance;

            rt = vlib_node_get_runtime_data (this_vlib_main, hw->tx_node_index);
            rt->hw_if_index = hw_index;
            rt->sw_if_index = hw->sw_if_index;
            rt->dev_instance = hw->dev_instance;
        });
        /* *INDENT-ON* */

        /* The new class may differ from the old one.
         * Functions have to be updated. */
        node = vlib_get_node (vm, hw->output_node_index);
        node->function = vnet_interface_output_node;
        node->format_trace = format_vnet_interface_output_trace;
        /* *INDENT-OFF* */
        foreach_vlib_main (
        {
            nrt = vlib_node_get_runtime (this_vlib_main, hw->output_node_index);
            nrt->function = node->function;
        });
        /* *INDENT-ON* */

        node = vlib_get_node (vm, hw->tx_node_index);
        node->function = dev_class->tx_function;
        node->format_trace = dev_class->format_tx_trace;
        /* *INDENT-OFF* */
        foreach_vlib_main (
        {
            nrt = vlib_node_get_runtime (this_vlib_main, hw->tx_node_index);
            nrt->function = node->function;
        });
        /* *INDENT-ON* */

        _vec_len (im->deleted_hw_interface_nodes) -= 1;
    }
    else
    {
        vlib_node_registration_t r;
        vnet_interface_output_runtime_t rt =
        {
            .hw_if_index = hw_index,
            .sw_if_index = hw->sw_if_index,
            .dev_instance = hw->dev_instance,
            .is_deleted = 0,
        };
        /* 注册物理层输出节点,该节点直接将报文输出,不需要下一个节点,除非报文出错 */
        clib_memset (&r, 0, sizeof (r));
        r.type = VLIB_NODE_TYPE_INTERNAL;
        r.runtime_data = &rt;
        r.runtime_data_bytes = sizeof (rt);
        r.scalar_size = 0;
        r.vector_size = sizeof (u32);

        r.flags = VLIB_NODE_FLAG_IS_OUTPUT;
        r.name = tx_node_name;
        r.function = dev_class->tx_function;

        hw->tx_node_index = vlib_register_node (vm, &r);

        vlib_node_add_named_next_with_slot (vm, hw->tx_node_index,
                                            "error-drop",
                                            VNET_INTERFACE_TX_NEXT_DROP);
        /* 注册链路层输出节点,用于构建链路层信息,指向物理层节点 */
        r.flags = 0;
        r.name = output_node_name;
        r.function = vnet_interface_output_node;
        r.format_trace = format_vnet_interface_output_trace;

        {
            static char *e[] =
            {
                "interface is down",
                "interface is deleted",
                "no buffers to segment GSO",
            };

            r.n_errors = ARRAY_LEN (e);
            r.error_strings = e;
        }
        hw->output_node_index = vlib_register_node (vm, &r);

        vlib_node_add_named_next_with_slot (vm, hw->output_node_index,
                                            "error-drop",
                                            VNET_INTERFACE_OUTPUT_NEXT_DROP);
        /* 指向物理层输出节点 */
        vlib_node_add_next_with_slot (vm, hw->output_node_index,
                                      hw->tx_node_index,
                                      VNET_INTERFACE_OUTPUT_NEXT_TX);

        /* add interface to the list of "output-interface" feature arc start nodes
         * and clone nexts from 1st interface if it exists 
         * 构建链路层输出节点的output-featrue-arc。
         * 将本output节点作为output-featrue-arc的一个起始几点。
         */
        fcm = vnet_feature_get_config_main (im->output_feature_arc_index);
        cm = &fcm->config_main;
        i = vec_len (cm->start_node_indices);
        vec_validate (cm->start_node_indices, i);
        cm->start_node_indices[i] = hw->output_node_index;
        if (hw_index)
        {
            /* copy nexts from 1st interface */
            vnet_hw_interface_t *first_hw;
            vlib_node_t *first_node;

            first_hw = vnet_get_hw_interface (vnm, /* hw_if_index */ 0);
            first_node = vlib_get_node (vm, first_hw->output_node_index);

            /* 1st 2 nexts are already added above */
            for (i = 2; i < vec_len (first_node->next_nodes); i++)
                        vlib_node_add_next_with_slot (vm, hw->output_node_index,
                                                      first_node->next_nodes[i], i);
        }
    }
    /* 构建链路层输出节点 */
    setup_output_node (vm, hw->output_node_index, hw_class);
    /* 构建物理层输出节点 */
    setup_tx_node (vm, hw->tx_node_index, dev_class);

no_output_nodes:
    /* Call all up/down callbacks with zero flags when interface is created. */
    vnet_sw_interface_set_flags_helper (vnm, hw->sw_if_index, /* flags */ 0,
                                        VNET_INTERFACE_SET_FLAGS_HELPER_IS_CREATE);
    vnet_hw_interface_set_flags_helper (vnm, hw_index, /* flags */ 0,
                                        VNET_INTERFACE_SET_FLAGS_HELPER_IS_CREATE);
    vec_free (tx_node_name);
    vec_free (output_node_name);

    return hw_index;
}

给接口分配指定的收包线程

在函数af_packet_create_if中调用了如下代码,用于设置该接口的收包线程,对于af_packet采用的是中断模式收包。

    /* 设置该接口对应的输入节点索引 */
    vnet_hw_interface_set_input_node (vnm, apif->hw_if_index,
                                      af_packet_input_node.index);
    /* 分配该接口到指定的收包线程中 */
    vnet_hw_interface_assign_rx_thread (vnm, apif->hw_if_index, 0,    /* queue */
                                        ~0 /* any cpu */ );
vnet_hw_interface_assign_rx_thread
/* 给接口分配收包线程 */
void
vnet_hw_interface_assign_rx_thread (vnet_main_t * vnm, u32 hw_if_index,
                                    u16 queue_id, uword thread_index)
{
    vnet_device_main_t *vdm = &vnet_device_main;
    vlib_main_t *vm, *vm0;
    vnet_device_input_runtime_t *rt;
    vnet_device_and_queue_t *dq;
    vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index);

    ASSERT (hw->input_node_index > 0);

    if (vdm->first_worker_thread_index == 0)/* 没有收包线程,即主线程负责所有工作,则使用主模式进行收包 */
        thread_index = 0;

    if (thread_index != 0 &&/* 选取合适的线程进行收包,算法为rr */
            (thread_index < vdm->first_worker_thread_index ||
             thread_index > vdm->last_worker_thread_index))
    {
        thread_index = vdm->next_worker_thread_index++;
        if (vdm->next_worker_thread_index > vdm->last_worker_thread_index)/* 开始下一个来回 */
            vdm->next_worker_thread_index = vdm->first_worker_thread_index;
    }

    vm = vlib_mains[thread_index];/* 收包线程 */
    vm0 = vlib_get_main ();/* 本线程,一般是主线程 */

    vlib_worker_thread_barrier_sync (vm0);/*  开始同步 */

    rt = vlib_node_get_runtime_data (vm, hw->input_node_index);/* 获取输入节点的运行数据 */

    vec_add2 (rt->devices_and_queues, dq, 1);/* 增加一个队列 */
    dq->hw_if_index = hw_if_index;
    dq->dev_instance = hw->dev_instance;
    dq->queue_id = queue_id;
    dq->mode = VNET_HW_INTERFACE_RX_MODE_POLLING;/* 默认设置轮询模式 */
    rt->enabled_node_state = VLIB_NODE_STATE_POLLING;

    vnet_device_queue_update (vnm, rt);
    /* 建立队列与线程之间的映射关系 */
    vec_validate (hw->input_node_thread_index_by_queue, queue_id);
    vec_validate (hw->rx_mode_by_queue, queue_id);
    hw->input_node_thread_index_by_queue[queue_id] = thread_index;
    hw->rx_mode_by_queue[queue_id] = VNET_HW_INTERFACE_RX_MODE_POLLING;

    vlib_worker_thread_barrier_release (vm0);
    /* 更新节点状态统计信息 */
    vlib_node_set_state (vm, hw->input_node_index, rt->enabled_node_state);
}

删除一个interface

我们还是以af_packet接口为例:

int
af_packet_delete_if (vlib_main_t * vm, u8 * host_if_name)
{
    vnet_main_t *vnm = vnet_get_main ();
    af_packet_main_t *apm = &af_packet_main;
    af_packet_if_t *apif;
    uword *p;
    uword if_index;
    u32 ring_sz;

    p = mhash_get (&apm->if_index_by_host_if_name, host_if_name);
    if (p == NULL)
    {
        vlib_log_warn (apm->log_class, "Host interface %s does not exist",
                       host_if_name);
        return VNET_API_ERROR_SYSCALL_ERROR_1;
    }
    apif = pool_elt_at_index (apm->interfaces, p[0]);
    if_index = apif - apm->interfaces;

    /* bring down the interface 将主机接口down掉*/
    vnet_hw_interface_set_flags (vnm, apif->hw_if_index, 0);
    /* 从input节点摘除 */
    vnet_hw_interface_unassign_rx_thread (vnm, apif->hw_if_index, 0);

    /* clean up 清除注册的中断,没有中断了将不会收包 */
    if (apif->clib_file_index != ~0)
    {
        clib_file_del (&file_main, file_main.file_pool + apif->clib_file_index);
        apif->clib_file_index = ~0;
    }
    else
        close (apif->fd);

    ring_sz = apif->rx_req->tp_block_size * apif->rx_req->tp_block_nr +
              apif->tx_req->tp_block_size * apif->tx_req->tp_block_nr;
    if (munmap (apif->rx_ring, ring_sz))
        vlib_log_warn (apm->log_class,
                       "Host interface %s could not free rx/tx ring",
                       host_if_name);
    apif->rx_ring = NULL;
    apif->tx_ring = NULL;
    apif->fd = -1;

    vec_free (apif->rx_req);
    apif->rx_req = NULL;
    vec_free (apif->tx_req);
    apif->tx_req = NULL;

    vec_free (apif->host_if_name);
    apif->host_if_name = NULL;
    apif->host_if_index = -1;

    mhash_unset (&apm->if_index_by_host_if_name, host_if_name, &if_index);
    /* 将以太网端口删除 */
    ethernet_delete_interface (vnm, apif->hw_if_index);

    pool_put (apm->interfaces, apif);

    return 0;
}

解除收包线程

int
vnet_hw_interface_unassign_rx_thread (vnet_main_t * vnm, u32 hw_if_index,
                                      u16 queue_id)
{
    vlib_main_t *vm, *vm0;
    vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index);
    vnet_device_input_runtime_t *rt;
    vnet_device_and_queue_t *dq;
    uword old_thread_index;
    vnet_hw_interface_rx_mode mode;

    if (hw->input_node_thread_index_by_queue == 0)
        return VNET_API_ERROR_INVALID_INTERFACE;

    if (vec_len (hw->input_node_thread_index_by_queue) < queue_id + 1)
        return VNET_API_ERROR_INVALID_INTERFACE;

    old_thread_index = hw->input_node_thread_index_by_queue[queue_id];

    vm = vlib_mains[old_thread_index];
    /* 从该输入节点的运行数据中删除hw->input_node_index */
    rt = vlib_node_get_runtime_data (vm, hw->input_node_index);

    vec_foreach (dq, rt->devices_and_queues)
    if (dq->hw_if_index == hw_if_index && dq->queue_id == queue_id)
    {
        mode = dq->mode;
        goto delete;
    }

    return VNET_API_ERROR_INVALID_INTERFACE;

delete:

    vm0 = vlib_get_main ();
    vlib_worker_thread_barrier_sync (vm0);
    vec_del1 (rt->devices_and_queues, dq - rt->devices_and_queues);
    vnet_device_queue_update (vnm, rt);
    hw->rx_mode_by_queue[queue_id] = VNET_HW_INTERFACE_RX_MODE_UNKNOWN;
    vlib_worker_thread_barrier_release (vm0);

    if (vec_len (rt->devices_and_queues) == 0)/*如果该类型的input节点没有了接口了,那么将该接口设置为disable*/
        vlib_node_set_state (vm, hw->input_node_index, VLIB_NODE_STATE_DISABLED);
    else if (mode == VNET_HW_INTERFACE_RX_MODE_POLLING)
    {
        /*
         * if the deleted interface is polling, we may need to set the node state
         * to interrupt if there is no more polling interface for this device's
         * corresponding thread. This is because mixed interfaces
         * (polling and interrupt), assigned to the same thread, set the
         * thread to polling prior to the deletion.
         */
        vec_foreach (dq, rt->devices_and_queues)
        {
            if (dq->mode == VNET_HW_INTERFACE_RX_MODE_POLLING)
                return 0;
        }
        rt->enabled_node_state = VLIB_NODE_STATE_INTERRUPT;
        vlib_node_set_state (vm, hw->input_node_index, rt->enabled_node_state);
    }

    return 0;
}

ethernet_delete_interface

void
ethernet_delete_interface (vnet_main_t * vnm, u32 hw_if_index)
{
    ethernet_main_t *em = &ethernet_main;
    ethernet_interface_t *ei;
    vnet_hw_interface_t *hi;
    main_intf_t *main_intf;
    vlan_table_t *vlan_table;
    u32 idx;

    hi = vnet_get_hw_interface (vnm, hw_if_index);
    ei = pool_elt_at_index (em->interfaces, hi->hw_instance);

    /* Delete vlan mapping table for dot1q and dot1ad. */
    main_intf = vec_elt_at_index (em->main_intfs, hi->hw_if_index);
    if (main_intf->dot1q_vlans)
    {
        vlan_table = vec_elt_at_index (em->vlan_pool, main_intf->dot1q_vlans);
        for (idx = 0; idx < ETHERNET_N_VLAN; idx++)
        {
            if (vlan_table->vlans[idx].qinqs)
            {
                pool_put_index (em->qinq_pool, vlan_table->vlans[idx].qinqs);
                vlan_table->vlans[idx].qinqs = 0;
            }
        }
        pool_put_index (em->vlan_pool, main_intf->dot1q_vlans);
        main_intf->dot1q_vlans = 0;
    }
    if (main_intf->dot1ad_vlans)
    {
        vlan_table = vec_elt_at_index (em->vlan_pool, main_intf->dot1ad_vlans);
        for (idx = 0; idx < ETHERNET_N_VLAN; idx++)
        {
            if (vlan_table->vlans[idx].qinqs)
            {
                pool_put_index (em->qinq_pool, vlan_table->vlans[idx].qinqs);
                vlan_table->vlans[idx].qinqs = 0;
            }
        }
        pool_put_index (em->vlan_pool, main_intf->dot1ad_vlans);
        main_intf->dot1ad_vlans = 0;
    }

    vnet_delete_hw_interface (vnm, hw_if_index);
    pool_put (em->interfaces, ei);
}

删除硬件接口信息

void
vnet_delete_hw_interface (vnet_main_t * vnm, u32 hw_if_index)
{
    vnet_interface_main_t *im = &vnm->interface_main;
    vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index);
    vlib_main_t *vm = vnm->vlib_main;
    vnet_device_class_t *dev_class = vnet_get_device_class (vnm,
            hw->dev_class_index);
    /* If it is up, mark it down. */
    if (hw->flags != 0)
        vnet_hw_interface_set_flags (vnm, hw_if_index, /* flags */ 0);

    /* Call delete callbacks. */
    call_hw_interface_add_del_callbacks (vnm, hw_if_index, /* is_create */ 0);

    /* Delete any sub-interfaces. */
    {
        u32 id, sw_if_index;
        /* *INDENT-OFF* */
        hash_foreach (id, sw_if_index, hw->sub_interface_sw_if_index_by_id,
                      (
        {
            vnet_sw_interface_t *si = vnet_get_sw_interface (vnm, sw_if_index);
            u64 sup_and_sub_key =
            ((u64) (si->sup_sw_if_index) << 32) | (u64) si->sub.id;
            hash_unset_mem_free (&im->sw_if_index_by_sup_and_sub, &sup_and_sub_key);
            vnet_delete_sw_interface (vnm, sw_if_index);
        }));
        hash_free (hw->sub_interface_sw_if_index_by_id);
        /* *INDENT-ON* */
    }

    /* Delete software interface corresponding to hardware interface. */
    vnet_delete_sw_interface (vnm, hw->sw_if_index);

    if (dev_class->tx_function)
    {
        /* Put output/tx nodes into recycle pool */
        vnet_hw_interface_nodes_t *dn;

        /* *INDENT-OFF* 遍历每一个线程,设置该节点的运行节点信息的标志位rt->is_deleted为1,
         * 而不是摘除该节点,提升处理速度。
         */
        foreach_vlib_main
        (
        {
            vnet_interface_output_runtime_t *rt =
            vlib_node_get_runtime_data (this_vlib_main, hw->output_node_index);

            /* Mark node runtime as deleted so output node (if called)
             * will drop packets. */
            rt->is_deleted = 1;
        });
        /* *INDENT-ON* */
        /* 重命名节点,会遍历所有的线程 */
        vlib_node_rename (vm, hw->output_node_index,
                          "interface-%d-output-deleted", hw_if_index);
        vlib_node_rename (vm, hw->tx_node_index, "interface-%d-tx-deleted",
                          hw_if_index);
        /* 放置在 */
        vec_add2 (im->deleted_hw_interface_nodes, dn, 1);
        dn->tx_node_index = hw->tx_node_index;
        dn->output_node_index = hw->output_node_index;
    }

    hash_unset_mem (im->hw_interface_by_name, hw->name);
    vec_free (hw->name);
    vec_free (hw->hw_address);
    vec_free (hw->input_node_thread_index_by_queue);
    vec_free (hw->dq_runtime_index_by_queue);

    pool_put (im->hw_interfaces, hw);
}

你可能感兴趣的

小妖怪 · 6月5日

小哥哥写的好好

回复

欧阳茜 · 6月27日

赞👍

回复

载入中...