IP 分片和重组过程
- IP 分片
当一个 IP 数据包的大小超过了网络的 MTU(最大传输单元)时,数据包需要被拆分成多个较小的片段进行传输。每个片段都包含一部分原始数据包的内容,并且带有标识信息,用于在接收端进行重组
原始 IP 数据包
+----------------------------------------------------------+
| IP Header | Data (Part 1) |
+----------------------------------------------------------+
| IP Header | Data (Part 2) |
+----------------------------------------------------------+
| IP Header | Data (Part 3) |
+----------------------------------------------------------+
- IP 重组
在接收端,分片的数据包需要被重新组合成原始的完整数据包。这是通过使用每个片段的标识信息和偏移量来实现的。
接收到的分片
+----------------+ +----------------+ +----------------+
| IP Header | Data (Part 1) | | IP Header | Data (Part 2) | | IP Header | Data (Part 3) |
+----------------+ +----------------+ +----------------+
重组后的数据包
+----------------------------------------------------------+
| IP Header | Data (Part 1) + Data (Part 2) + Data (Part 3) |
+----------------------------------------------------------+
初始化和配置
#include <stdio.h>
#include <signal.h>
#include <rte_eal.h>
#include <rte_ethdev.h>
#include <rte_mbuf.h>
#include <rte_ip_frag.h>
#include <rte_ip.h>
#define NUM_MBUFS 8191
#define MBUF_CACHE_SIZE 250
#define BURST_SIZE 32
#define MAX_FRAG_NUM 4
#define MTU 1500
static volatile bool force_quit;
struct rte_mempool *mbuf_pool;
static void signal_handler(int signum) {
if (signum == SIGINT || signum == SIGTERM) {
printf("\nSignal %d received, preparing to exit...\n", signum);
force_quit = true;
}
}
static int configure_eth_device(uint16_t port_id) {
struct rte_eth_conf port_conf = {0};
int ret;
ret = rte_eth_dev_configure(port_id, 1, 1, &port_conf);
if (ret < 0)
return ret;
ret = rte_eth_rx_queue_setup(port_id, 0, 128, rte_eth_dev_socket_id(port_id), NULL, mbuf_pool);
if (ret < 0)
return ret;
ret = rte_eth_tx_queue_setup(port_id, 0, 128, rte_eth_dev_socket_id(port_id), NULL);
if (ret < 0)
return ret;
ret = rte_eth_dev_start(port_id);
if (ret < 0)
return ret;
return 0;
}
分片函数
static int fragment_packet(struct rte_mbuf *pkt, struct rte_mbuf **frags, uint16_t mtu) {
struct rte_ipv4_hdr *ip_hdr;
uint16_t frag_size, frag_offset, frag_num;
struct rte_mbuf *frag;
uint16_t ip_hdr_size = sizeof(struct rte_ipv4_hdr);
uint16_t eth_hdr_size = sizeof(struct rte_ether_hdr);
uint16_t pkt_data_size = rte_pktmbuf_data_len(pkt) - eth_hdr_size - ip_hdr_size;
ip_hdr = rte_pktmbuf_mtod_offset(pkt, struct rte_ipv4_hdr *, eth_hdr_size);
// 每个片段的大小应不超过 MTU 减去 IP 头部的大小,并且是8的倍数
frag_size = (mtu - ip_hdr_size) & ~7; // 确保是8的倍数
frag_offset = 0;
frag_num = 0;
while (frag_offset < pkt_data_size) {
frag = rte_pktmbuf_alloc(mbuf_pool);
if (!frag)
return -1;
// 片段大小
uint16_t copy_size = RTE_MIN(frag_size, pkt_data_size - frag_offset);
// 复制以太网头部
rte_memcpy(rte_pktmbuf_append(frag, eth_hdr_size), rte_pktmbuf_mtod(pkt, void *), eth_hdr_size);
// 复制IP头部
struct rte_ipv4_hdr *frag_ip_hdr = rte_pktmbuf_mtod_offset(frag, struct rte_ipv4_hdr *, eth_hdr_size);
rte_memcpy(frag_ip_hdr, ip_hdr, ip_hdr_size);
// 设置片段的偏移量和标志
frag_ip_hdr->fragment_offset = rte_cpu_to_be_16((frag_offset >> 3) | (copy_size < frag_size ? 0 : IP_MF));
frag_ip_hdr->total_length = rte_cpu_to_be_16(ip_hdr_size + copy_size);
// 复制数据
rte_memcpy(rte_pktmbuf_append(frag, copy_size), rte_pktmbuf_mtod_offset(pkt, void *, eth_hdr_size + ip_hdr_size + frag_offset), copy_size);
frags[frag_num++] = frag;
frag_offset += copy_size;
}
return frag_num;
}
重组函数
static struct rte_mbuf *reassemble_packet(struct rte_mbuf **frags, uint16_t frag_num) {
struct rte_ip_frag_tbl *tbl;
struct rte_ip_frag_death_row death_row;
struct rte_mbuf *reassembled_pkt = NULL;
// 创建一个IP分片表,用于存储和管理分片信息
tbl = rte_ip_frag_table_create(1024, MAX_FRAG_NUM, 0, 0, rte_socket_id());
if (!tbl)
return NULL;
// 初始化死亡行,用于存储过期的分片
rte_ip_frag_death_row_init(&death_row, 0);
// 逐个处理分片,将其添加到分片表中
for (int i = 0; i < frag_num; i++) {
uint64_t tms = rte_rdtsc();
// 将分片添加到分片表中,并尝试重组数据包
reassembled_pkt = rte_ipv4_frag_reassemble_packet(tbl, &death_row, frags[i], tms, rte_socket_id());
if (reassembled_pkt != NULL) {
// 成功重组数据包
break;
}
}
// 处理死亡行中的过期分片
rte_ip_frag_free_death_row(&death_row, 0);
// 销毁分片表
rte_ip_frag_table_destroy(tbl);
return reassembled_pkt;
}
测试函数:模拟发送大于 MTU 的数据包,进行分片和重组,并验证结果。
static void test_fragmentation_reassembly() {
struct rte_mbuf *pkt;
struct rte_mbuf *frags[MAX_FRAG_NUM];
struct rte_mbuf *reassembled_pkt;
uint16_t frag_num;
// 创建一个大于MTU的数据包
pkt = rte_pktmbuf_alloc(mbuf_pool);
if (!pkt) {
printf("Failed to allocate packet\n");
return;
}
// 填充数据包
rte_pktmbuf_append(pkt, 2000); // 大于MTU
// 分片
frag_num = fragment_packet(pkt, frags, MTU);
if (frag_num <= 0) {
printf("Failed to fragment packet\n");
rte_pktmbuf_free(pkt);
return;
}
printf("Packet fragmented into %d fragments\n", frag_num);
// 重组
reassembled_pkt = reassemble_packet(frags, frag_num);
if (reassembled_pkt) {
printf("Packet reassembled successfully\n");
rte_pktmbuf_free(reassembled_pkt);
} else {
printf("Packet reassembly failed\n");
}
// 释放分片
for (int i = 0; i < frag_num; i++) {
rte_pktmbuf_free(frags[i]);
}
rte_pktmbuf_free(pkt);
}
主函数
int main(int argc, char **argv) {
// 初始化 DPDK 环境
int ret = rte_eal_init(argc, argv);
if (ret < 0)
rte_exit(EXIT_FAILURE, "Error with EAL initialization\n");
force_quit = false;
// 注册信号处理函数
signal(SIGINT, signal_handler);
signal(SIGTERM, signal_handler);
// 创建内存池,用于存放数据包
mbuf_pool = rte_pktmbuf_pool_create("MBUF_POOL", NUM_MBUFS, MBUF_CACHE_SIZE, 0, RTE_MBUF_DEFAULT_BUF_SIZE, rte_socket_id());
if (mbuf_pool == NULL)
rte_exit(EXIT_FAILURE, "Cannot create mbuf pool\n");
uint16_t port_id = 0;
ret = configure_eth_device(port_id);
if (ret < 0)
rte_exit(EXIT_FAILURE, "Cannot configure device: err=%d, port=%u\n", ret, port_id);
// 测试分片和重组过程
test_fragmentation_reassembly();
// 停止并关闭以太网设备
rte_eth_dev_stop(port_id);
rte_eth_dev_close(port_id);
// 释放内存池
rte_mempool_free(mbuf_pool);
return 0;
}
**粗体** _斜体_ [链接](http://example.com) `代码` - 列表 > 引用
。你还可以使用@
来通知其他用户。