IP 分片和重组过程

  1. IP 分片
    当一个 IP 数据包的大小超过了网络的 MTU(最大传输单元)时,数据包需要被拆分成多个较小的片段进行传输。每个片段都包含一部分原始数据包的内容,并且带有标识信息,用于在接收端进行重组
原始 IP 数据包
+----------------------------------------------------------+
| IP Header | Data (Part 1)                                 |
+----------------------------------------------------------+
| IP Header | Data (Part 2)                                 |
+----------------------------------------------------------+
| IP Header | Data (Part 3)                                 |
+----------------------------------------------------------+
  1. IP 重组
    在接收端,分片的数据包需要被重新组合成原始的完整数据包。这是通过使用每个片段的标识信息和偏移量来实现的。
接收到的分片
+----------------+  +----------------+  +----------------+
| IP Header | Data (Part 1) |  | IP Header | Data (Part 2) |  | IP Header | Data (Part 3) |
+----------------+  +----------------+  +----------------+

重组后的数据包
+----------------------------------------------------------+
| IP Header | Data (Part 1) + Data (Part 2) + Data (Part 3) |
+----------------------------------------------------------+

初始化和配置

#include <stdio.h>
#include <signal.h>
#include <rte_eal.h>
#include <rte_ethdev.h>
#include <rte_mbuf.h>
#include <rte_ip_frag.h>
#include <rte_ip.h>

#define NUM_MBUFS 8191
#define MBUF_CACHE_SIZE 250
#define BURST_SIZE 32
#define MAX_FRAG_NUM 4
#define MTU 1500

static volatile bool force_quit;
struct rte_mempool *mbuf_pool;

static void signal_handler(int signum) {
    if (signum == SIGINT || signum == SIGTERM) {
        printf("\nSignal %d received, preparing to exit...\n", signum);
        force_quit = true;
    }
}

static int configure_eth_device(uint16_t port_id) {
    struct rte_eth_conf port_conf = {0};
    int ret;

    ret = rte_eth_dev_configure(port_id, 1, 1, &port_conf);
    if (ret < 0)
        return ret;

    ret = rte_eth_rx_queue_setup(port_id, 0, 128, rte_eth_dev_socket_id(port_id), NULL, mbuf_pool);
    if (ret < 0)
        return ret;

    ret = rte_eth_tx_queue_setup(port_id, 0, 128, rte_eth_dev_socket_id(port_id), NULL);
    if (ret < 0)
        return ret;

    ret = rte_eth_dev_start(port_id);
    if (ret < 0)
        return ret;

    return 0;
}

分片函数

static int fragment_packet(struct rte_mbuf *pkt, struct rte_mbuf **frags, uint16_t mtu) {
    struct rte_ipv4_hdr *ip_hdr;
    uint16_t frag_size, frag_offset, frag_num;
    struct rte_mbuf *frag;
    uint16_t ip_hdr_size = sizeof(struct rte_ipv4_hdr);
    uint16_t eth_hdr_size = sizeof(struct rte_ether_hdr);
    uint16_t pkt_data_size = rte_pktmbuf_data_len(pkt) - eth_hdr_size - ip_hdr_size;

    ip_hdr = rte_pktmbuf_mtod_offset(pkt, struct rte_ipv4_hdr *, eth_hdr_size);

    // 每个片段的大小应不超过 MTU 减去 IP 头部的大小,并且是8的倍数
    frag_size = (mtu - ip_hdr_size) & ~7; // 确保是8的倍数
    frag_offset = 0;
    frag_num = 0;

    while (frag_offset < pkt_data_size) {
        frag = rte_pktmbuf_alloc(mbuf_pool);
        if (!frag)
            return -1;

        // 片段大小
        uint16_t copy_size = RTE_MIN(frag_size, pkt_data_size - frag_offset);

        // 复制以太网头部
        rte_memcpy(rte_pktmbuf_append(frag, eth_hdr_size), rte_pktmbuf_mtod(pkt, void *), eth_hdr_size);

        // 复制IP头部
        struct rte_ipv4_hdr *frag_ip_hdr = rte_pktmbuf_mtod_offset(frag, struct rte_ipv4_hdr *, eth_hdr_size);
        rte_memcpy(frag_ip_hdr, ip_hdr, ip_hdr_size);

        // 设置片段的偏移量和标志
        frag_ip_hdr->fragment_offset = rte_cpu_to_be_16((frag_offset >> 3) | (copy_size < frag_size ? 0 : IP_MF));
        frag_ip_hdr->total_length = rte_cpu_to_be_16(ip_hdr_size + copy_size);

        // 复制数据
        rte_memcpy(rte_pktmbuf_append(frag, copy_size), rte_pktmbuf_mtod_offset(pkt, void *, eth_hdr_size + ip_hdr_size + frag_offset), copy_size);

        frags[frag_num++] = frag;
        frag_offset += copy_size;
    }

    return frag_num;
}

重组函数

static struct rte_mbuf *reassemble_packet(struct rte_mbuf **frags, uint16_t frag_num) {
    struct rte_ip_frag_tbl *tbl;
    struct rte_ip_frag_death_row death_row;
    struct rte_mbuf *reassembled_pkt = NULL;

    // 创建一个IP分片表,用于存储和管理分片信息
    tbl = rte_ip_frag_table_create(1024, MAX_FRAG_NUM, 0, 0, rte_socket_id());
    if (!tbl)
        return NULL;

    // 初始化死亡行,用于存储过期的分片
    rte_ip_frag_death_row_init(&death_row, 0);

    // 逐个处理分片,将其添加到分片表中
    for (int i = 0; i < frag_num; i++) {
        uint64_t tms = rte_rdtsc();

        // 将分片添加到分片表中,并尝试重组数据包
        reassembled_pkt = rte_ipv4_frag_reassemble_packet(tbl, &death_row, frags[i], tms, rte_socket_id());
        if (reassembled_pkt != NULL) {
            // 成功重组数据包
            break;
        }
    }

    // 处理死亡行中的过期分片
    rte_ip_frag_free_death_row(&death_row, 0);

    // 销毁分片表
    rte_ip_frag_table_destroy(tbl);

    return reassembled_pkt;
}

测试函数:模拟发送大于 MTU 的数据包,进行分片和重组,并验证结果。

static void test_fragmentation_reassembly() {
    struct rte_mbuf *pkt;
    struct rte_mbuf *frags[MAX_FRAG_NUM];
    struct rte_mbuf *reassembled_pkt;
    uint16_t frag_num;

    // 创建一个大于MTU的数据包
    pkt = rte_pktmbuf_alloc(mbuf_pool);
    if (!pkt) {
        printf("Failed to allocate packet\n");
        return;
    }

    // 填充数据包
    rte_pktmbuf_append(pkt, 2000); // 大于MTU

    // 分片
    frag_num = fragment_packet(pkt, frags, MTU);
    if (frag_num <= 0) {
        printf("Failed to fragment packet\n");
        rte_pktmbuf_free(pkt);
        return;
    }

    printf("Packet fragmented into %d fragments\n", frag_num);

    // 重组
    reassembled_pkt = reassemble_packet(frags, frag_num);
    if (reassembled_pkt) {
        printf("Packet reassembled successfully\n");
        rte_pktmbuf_free(reassembled_pkt);
    } else {
        printf("Packet reassembly failed\n");
    }

    // 释放分片
    for (int i = 0; i < frag_num; i++) {
        rte_pktmbuf_free(frags[i]);
    }

    rte_pktmbuf_free(pkt);
}

主函数

int main(int argc, char **argv) {
    // 初始化 DPDK 环境
    int ret = rte_eal_init(argc, argv);
    if (ret < 0)
        rte_exit(EXIT_FAILURE, "Error with EAL initialization\n");

    force_quit = false;
    // 注册信号处理函数
    signal(SIGINT, signal_handler);
    signal(SIGTERM, signal_handler);

    // 创建内存池,用于存放数据包
    mbuf_pool = rte_pktmbuf_pool_create("MBUF_POOL", NUM_MBUFS, MBUF_CACHE_SIZE, 0, RTE_MBUF_DEFAULT_BUF_SIZE, rte_socket_id());
    if (mbuf_pool == NULL)
        rte_exit(EXIT_FAILURE, "Cannot create mbuf pool\n");

    uint16_t port_id = 0;
    ret = configure_eth_device(port_id);
    if (ret < 0)
        rte_exit(EXIT_FAILURE, "Cannot configure device: err=%d, port=%u\n", ret, port_id);

    // 测试分片和重组过程
    test_fragmentation_reassembly();

    // 停止并关闭以太网设备
    rte_eth_dev_stop(port_id);
    rte_eth_dev_close(port_id);
    // 释放内存池
    rte_mempool_free(mbuf_pool);

    return 0;
}

putao
8 声望1 粉丝

推动世界向前发展,改善民生。