1.首包在prerouting或者在output节点上,在函数resolve_normal_ct中会创建连接跟踪。如果存在期望连接的话,则关联其对应的master连接,并且设置新创建的连接跟踪的状态为IP_CT_RELATED,若果没有对应的期望连接,则设置其状态为IP_CT_NEW。正常情况下,首包只有这两种状态。
2.对于第一个应答包和后续应答包也是在prerouting或者在output节点上,一般在函数resolve_normal_ct中可以找到对应的处于IP_CT_RELATED或者IP_CT_NEW状态的连接跟踪,找到后设置其状态为IP_CT_ESTABLISHED_REPLY
3.对于非首个请求报文,在prerouting或者在output节点上,函数resolve_normal_ct中可以找到对应的连接跟踪,一般会将其设置为IP_CT_ESTABLISHED。
4.对于ICMP错误报文(比如源抑制,ttl超时,不可达报文)到达netfilter后,会根据ICMP携带的原始报文查找其所属的CT,如果该ICMP差错报文是CT的请求方向报文产生的,那么设置其状态为IP_CT_RELATED,如果是应答方向的报文产生的则设置为IP_CT_RELATED_REPLY。
5.很重要的一点,对于子连接的首包,会在函数init_conntrack中创建连接跟踪,并查找到其对应的子连接(先创建连接跟踪,然后查找期望连接进行关联),在离开init_conntrack函数之前执行对应的expectfn函数。
我们以典型的ftp的期望连接为例,分析expectfn的作用。
假设没有nat
主动模式
客户端发送PORT xxx,xxx,xxx,xxx,ppp,ppp给服务器,连接跟踪通过help结构捕获了该消息,然后生成了请求方向的期望连接跟踪(假设从PORT命令中拿到的ip地址为dataip,端口为dataport)。假设主链接的应答方向ip为rip
dip/mask = dataip/0xffffffff
dport/mask = dataport/0xffff
sip/mask = rip/0xffffffff(由于源IP可以由服务器端重新制定,所以这里直接设置母连接服务器的IP地址也是不准确的,但是大多数正常情况是这样的)
sport/mask = 0/0
protocol = tcp
// 可以从函数nf_ct_expect_init调用出分析得出。
/* 初始化期望连接,使用反方向的源地址和目的地址作为源目的地址,使用内容中的端口作为目的端口 */
nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT, cmd.l3num,
&ct->tuplehash[!dir].tuple.src.u3, daddr,
IPPROTO_TCP, NULL, &cmd.u.tcp.port);
void nf_ct_expect_init(struct nf_conntrack_expect *exp, unsigned int class,
u_int8_t family,
const union nf_inet_addr *saddr,
const union nf_inet_addr *daddr,
u_int8_t proto, const __be16 *src, const __be16 *dst)
{
int len;
/* 初始化期望连接,使用反方向的源地址和目的地址作为源目的地址,使用内容中的端口作为目的端口 */
if (family == AF_INET)
len = 4;
else
len = 16;
exp->flags = 0;
exp->class = class;
exp->expectfn = NULL;
exp->helper = NULL;
exp->tuple.src.l3num = family;
exp->tuple.dst.protonum = proto;
//以ftp的主动模式为例,nat保护客户端
if (saddr) {/* 服务器源公网IP,因为主动模式是由服务器发起连接 */
memcpy(&exp->tuple.src.u3, saddr, len);
if (sizeof(exp->tuple.src.u3) > len)
/* address needs to be cleared for nf_ct_tuple_equal */
memset((void *)&exp->tuple.src.u3 + len, 0x00,
sizeof(exp->tuple.src.u3) - len);
memset(&exp->mask.src.u3, 0xFF, len);
if (sizeof(exp->mask.src.u3) > len)
memset((void *)&exp->mask.src.u3 + len, 0x00,
sizeof(exp->mask.src.u3) - len);
} else {/* 没有的话通配 */
memset(&exp->tuple.src.u3, 0x00, sizeof(exp->tuple.src.u3));
memset(&exp->mask.src.u3, 0x00, sizeof(exp->mask.src.u3));
}
if (src) {/* 源端口,一般不会设置,ftp数据端口还没发起,所以一般不设置 */
exp->tuple.src.u.all = *src;
exp->mask.src.u.all = htons(0xFFFF);
} else {
exp->tuple.src.u.all = 0;
exp->mask.src.u.all = 0;
}
/* 目的地址采用反向连接的目的IP,这个IP是客户端经过nat之后的IP */
memcpy(&exp->tuple.dst.u3, daddr, len);
if (sizeof(exp->tuple.dst.u3) > len)
/* address needs to be cleared for nf_ct_tuple_equal */
memset((void *)&exp->tuple.dst.u3 + len, 0x00,
sizeof(exp->tuple.dst.u3) - len);
/* 目的端口采用PORT命令中的端口,这个端口是修改后的端口 */
exp->tuple.dst.u.all = *dst;
#ifdef CONFIG_NF_NAT_NEEDED
memset(&exp->saved_addr, 0, sizeof(exp->saved_addr));
memset(&exp->saved_proto, 0, sizeof(exp->saved_proto));
#endif
}
static int help(struct sk_buff *skb,
unsigned int protoff,
struct nf_conn *ct,
enum ip_conntrack_info ctinfo)
{
unsigned int dataoff, datalen;
const struct tcphdr *th;
struct tcphdr _tcph;
const char *fb_ptr;
int ret;
u32 seq;
int dir = CTINFO2DIR(ctinfo);
unsigned int uninitialized_var(matchlen), uninitialized_var(matchoff);
struct nf_ct_ftp_master *ct_ftp_info = nfct_help_data(ct);
struct nf_conntrack_expect *exp;
union nf_inet_addr *daddr;
struct nf_conntrack_man cmd = {};
unsigned int i;
int found = 0, ends_in_nl;
typeof(nf_nat_ftp_hook) nf_nat_ftp;
/* Until there's been traffic both ways, don't look in packets. */
if (ctinfo != IP_CT_ESTABLISHED &&
ctinfo != IP_CT_ESTABLISHED_REPLY) {
pr_debug("ftp: Conntrackinfo = %u\n", ctinfo);
return NF_ACCEPT;
}
th = skb_header_pointer(skb, protoff, sizeof(_tcph), &_tcph);
if (th == NULL)
return NF_ACCEPT;
dataoff = protoff + th->doff * 4;
/* No data? */
if (dataoff >= skb->len) {
pr_debug("ftp: dataoff(%u) >= skblen(%u)\n", dataoff,
skb->len);
return NF_ACCEPT;
}
datalen = skb->len - dataoff;
spin_lock_bh(&nf_ftp_lock);
fb_ptr = skb_header_pointer(skb, dataoff, datalen, ftp_buffer);
BUG_ON(fb_ptr == NULL);
ends_in_nl = (fb_ptr[datalen - 1] == '\n');
seq = ntohl(th->seq) + datalen;
/* Look up to see if we're just after a \n. */
if (!find_nl_seq(ntohl(th->seq), ct_ftp_info, dir)) {
/* We're picking up this, clear flags and let it continue */
if (unlikely(ct_ftp_info->flags[dir] & NF_CT_FTP_SEQ_PICKUP)) {
ct_ftp_info->flags[dir] ^= NF_CT_FTP_SEQ_PICKUP;
goto skip_nl_seq;
}
/* Now if this ends in \n, update ftp info. */
pr_debug("nf_conntrack_ftp: wrong seq pos %s(%u) or %s(%u)\n",
ct_ftp_info->seq_aft_nl_num[dir] > 0 ? "" : "(UNSET)",
ct_ftp_info->seq_aft_nl[dir][0],
ct_ftp_info->seq_aft_nl_num[dir] > 1 ? "" : "(UNSET)",
ct_ftp_info->seq_aft_nl[dir][1]);
ret = NF_ACCEPT;
goto out_update_nl;
}
skip_nl_seq:
/* Initialize IP/IPv6 addr to expected address (it's not mentioned
in EPSV responses) */
cmd.l3num = nf_ct_l3num(ct);
memcpy(cmd.u3.all, &ct->tuplehash[dir].tuple.src.u3.all,
sizeof(cmd.u3.all));
for (i = 0; i < ARRAY_SIZE(search[dir]); i++) {
found = find_pattern(fb_ptr, datalen,
search[dir][i].pattern,
search[dir][i].plen,
search[dir][i].skip,
search[dir][i].term,
&matchoff, &matchlen,
&cmd,
search[dir][i].getnum);
if (found) break;
}
if (found == -1) {
/* We don't usually drop packets. After all, this is
connection tracking, not packet filtering.
However, it is necessary for accurate tracking in
this case. */
nf_ct_helper_log(skb, ct, "partial matching of `%s'",
search[dir][i].pattern);
ret = NF_DROP;
goto out;
} else if (found == 0) { /* No match */
ret = NF_ACCEPT;
goto out_update_nl;
}
pr_debug("conntrack_ftp: match `%.*s' (%u bytes at %u)\n",
matchlen, fb_ptr + matchoff,
matchlen, ntohl(th->seq) + matchoff);
exp = nf_ct_expect_alloc(ct);
if (exp == NULL) {
nf_ct_helper_log(skb, ct, "cannot alloc expectation");
ret = NF_DROP;
goto out;
}
/* We refer to the reverse direction ("!dir") tuples here,
* because we're expecting something in the other direction.
* Doesn't matter unless NAT is happening.
* 获取反方向的目的地址
*/
daddr = &ct->tuplehash[!dir].tuple.dst.u3;
/* Update the ftp info */
if ((cmd.l3num == nf_ct_l3num(ct)) &&
memcmp(&cmd.u3.all, &ct->tuplehash[dir].tuple.src.u3.all,
sizeof(cmd.u3.all))) {
/* Enrico Scholz's passive FTP to partially RNAT'd ftp
server: it really wants us to connect to a
different IP address. Simply don't record it for
NAT. */
if (cmd.l3num == PF_INET) {
pr_debug("NOT RECORDING: %pI4 != %pI4\n",
&cmd.u3.ip,
&ct->tuplehash[dir].tuple.src.u3.ip);
} else {
pr_debug("NOT RECORDING: %pI6 != %pI6\n",
cmd.u3.ip6,
ct->tuplehash[dir].tuple.src.u3.ip6);
}
/* Thanks to Cristiano Lincoln Mattos
<lincoln@cesar.org.br> for reporting this potential
problem (DMZ machines opening holes to internal
networks, or the packet filter itself). */
if (!loose) {
ret = NF_ACCEPT;
goto out_put_expect;
}
daddr = &cmd.u3;
}
/* 初始化期望连接,使用反方向的源地址和目的地址作为源目的地址,使用内容中的端口作为目的端口 */
nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT, cmd.l3num,
&ct->tuplehash[!dir].tuple.src.u3, daddr,
IPPROTO_TCP, NULL, &cmd.u.tcp.port);
/* Now, NAT might want to mangle the packet, and register the
* (possibly changed) expectation itself. */
nf_nat_ftp = rcu_dereference(nf_nat_ftp_hook);
if (nf_nat_ftp && ct->status & IPS_NAT_MASK)
ret = nf_nat_ftp(skb, ctinfo, search[dir][i].ftptype,
protoff, matchoff, matchlen, exp);
else {
/* Can't expect this? Best to drop packet now. */
if (nf_ct_expect_related(exp) != 0) {
nf_ct_helper_log(skb, ct, "cannot add expectation");
ret = NF_DROP;
} else
ret = NF_ACCEPT;
}
out_put_expect:
nf_ct_expect_put(exp);
out_update_nl:
/* Now if this ends in \n, update ftp info. Seq may have been
* adjusted by NAT code. */
if (ends_in_nl)
update_nl_seq(ct, seq, ct_ftp_info, dir, skb);
out:
spin_unlock_bh(&nf_ftp_lock);
return ret;
}
从下面语句:
/* Now, NAT might want to mangle the packet, and register the
* (possibly changed) expectation itself. */
nf_nat_ftp = rcu_dereference(nf_nat_ftp_hook);
if (nf_nat_ftp && ct->status & IPS_NAT_MASK)
ret = nf_nat_ftp(skb, ctinfo, search[dir][i].ftptype,
protoff, matchoff, matchlen, exp);
可以看出,如果主连接没有进行NAT的话,是不会设置exp->expectfn函数的。
被动模式
同样对于被动方式,客户端发送PASV命令,然后服务器端发送xxx,xxx,xxx,xxx,ppp,ppp给客户端。help函数捕获了该消息,然后创建expect连接。假设客户端主链接的IP为cip。
dip/mask = dataip/0xffffffff
dport/mask = dataport/0xffff
sip/mask = cip/0xffffffff
sport/mask = 0/0
protocol = tcp
好了,这里说一句,exp都是子连接请求方向的。
如果有NAT,那help和expect还会做什么了?
NAT在客户端侧,这种场景很常见
主动模式
客户端发送PORT xxx,xxx,xxx,xxx,ppp,ppp。NAT设备通过help函数捕获了该消息。因为客户端是一个私网地址,其port命令中的地址也是一个私网地址,如果直接发送给服务器端,那么服务器端将不能连接该地址。所以,NAT设备需要为数据连接选择一个公网地址(可以是和主链接一样的地址,也可以是别的地址)和一个新的端口号,进行地址转换,将转换后的地址重新填充到PORT命令中。假设客户端发送的PORT命令为PORT 10.10.10.10 10000。经过NAT设备时,给其替换成1.1.1.1 1000。那么服务器将会连接客户端的(1.1.1.1 10000)。NAT设备必须为子连接的请求方向构建期望连接,即为:
dip/mask = 1.1.1.1/0xffffffff
dport/mask = 10000/0xffff
sip/mask = rip/0xffffffff(由于源IP可以由服务器重新指定,所以这里直接设置母连接服务器端的IP地址也是不准确的,但是大多数正常情况是这样的)
sport/mask = 0/0
protocol = tcp
假设服务器端以2.2.2.2 20连接客户端1.1.1.1 10000。那么NAT将会为数据通道创建新的连接跟踪为:
请求方向
dip = 1.1.1.1
dport = 10000
sip = 2.2.2.2
sport = 20
protocol = tcp
应答方向:
dip = 2.2.2.2
dport = 20
sip = 1.1.1.1
sport = 10000
protocol = tcp
这样的连接跟踪创建后,请求方向的报文都可以找到对应的连接跟踪,但是客户端收到的请求报文是经过dnat方向操作后的报文:
dip = 10.10.10.10
dport = 10000
sip = 2.2.2.2
sport = 20
protocol = tcp
发送的回应报文为:
dip = 2.2.2.2
dport = 20
sip = 10.10.10.10
sport = 10000
protocol = tcp
是无法命中连接跟踪的。
所以对于这种情况下的子连接需要做两件事:
1.为子连接构建nat信息,在nat模块中将根据这些信息进行nat操作。
2.修正连接跟踪的应答方向五元组,使客户端报文能命中连接跟踪。
这两件事由谁来做了?
答案就是expect函数
if (exp) {/* 执行期望函数 */
if (exp->expectfn)
exp->expectfn(ct, exp);
nf_ct_expect_put(exp);
}
对于ftp来说,该函数为nf_nat_follow_master。从上面可以知道,数据通道请求方向需要做DNAT。其中exp->dir的值为主连接设置的,该值为help函数收到PORT命令时的方向的反方向(主动模式为请求方向,那么反方向为应答方向),所以exp->dir的值为IP_CT_DIR_REPLY。
/* Setup NAT on this expected conntrack so it follows master. */
/* If we fail to get a free NAT slot, we'll get dropped on confirm */
void nf_nat_follow_master(struct nf_conn *ct,
struct nf_conntrack_expect *exp)
{
struct nf_nat_range range;
/* This must be a fresh one. */
BUG_ON(ct->status & IPS_NAT_DONE_MASK);
/* Change src to where master sends to */
range.flags = NF_NAT_RANGE_MAP_IPS;
range.min_addr = range.max_addr
= ct->master->tuplehash[!exp->dir].tuple.dst.u3;
nf_nat_setup_info(ct, &range, NF_NAT_MANIP_SRC);
/* For DST manip, map port here to where it's expected. */
/* 进行DNAT处理 */
range.flags = (NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED);
range.min_proto = range.max_proto = exp->saved_proto;
range.min_addr = range.max_addr
= ct->master->tuplehash[!exp->dir].tuple.src.u3;//这里使用的是主连接的请求方向源IP,即客户端主链接IP。
nf_nat_setup_info(ct, &range, NF_NAT_MANIP_DST);//构建nat信息
}
重点分析nat信息的构建
/* 根据提供的nat类型以及范围进行nat五元组修改 */
unsigned int
nf_nat_setup_info(struct nf_conn *ct,
const struct nf_nat_range *range,
enum nf_nat_manip_type maniptype)
{
struct net *net = nf_ct_net(ct);/* 获取该连接跟踪所在的网络命名空间 */
struct nf_conntrack_tuple curr_tuple, new_tuple;
/* Can't setup nat info for confirmed ct. */
/* 连接已经确认的不在进行构建 */
if (nf_ct_is_confirmed(ct))
return NF_ACCEPT;
WARN_ON(maniptype != NF_NAT_MANIP_SRC &&
maniptype != NF_NAT_MANIP_DST);
if (WARN_ON(nf_nat_initialized(ct, maniptype)))
return NF_DROP;
/* What we've got will look like inverse of reply. Normally
* this is what is in the conntrack, except for prior
* manipulations (future optimization: if num_manips == 0,
* orig_tp = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple)
* 获取请求方向的五元组
*/
nf_ct_invert_tuplepr(&curr_tuple,
&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
/* 根据请求方向的五元组获取nat后的请求方向的五元组 */
get_unique_tuple(&new_tuple, &curr_tuple, range, ct, maniptype);
/* 新的请求方向的五元组与原来的五元组不一样,则需要改变应答方向的五元组 */
if (!nf_ct_tuple_equal(&new_tuple, &curr_tuple)) {
struct nf_conntrack_tuple reply;
/* Alter conntrack table so will recognize replies. */
/* 根据新的五元组得到应答方向的新的五元组 */
nf_ct_invert_tuplepr(&reply, &new_tuple);
/* 替换应答方向的五元组 */
nf_conntrack_alter_reply(ct, &reply);
/* Non-atomic: we own this at the moment. */
if (maniptype == NF_NAT_MANIP_SRC)
ct->status |= IPS_SRC_NAT;
else
ct->status |= IPS_DST_NAT;
/* 判断该连接是否存在help,如果存在则必须添加seq-adj扩展功能 */
if (nfct_help(ct) && !nfct_seqadj(ct))
if (!nfct_seqadj_ext_add(ct))
return NF_DROP;
}
/* 如果是源nat操作,则将该五元组添加到nf_nat_bysource hash表中 */
if (maniptype == NF_NAT_MANIP_SRC) {
unsigned int srchash;
spinlock_t *lock;
srchash = hash_by_src(net,
&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
lock = &nf_nat_locks[srchash % CONNTRACK_LOCKS];
spin_lock_bh(lock);
hlist_add_head_rcu(&ct->nat_bysource,
&nf_nat_bysource[srchash]);
spin_unlock_bh(lock);
}
/* It's done. nat处理完毕 */
if (maniptype == NF_NAT_MANIP_DST)
ct->status |= IPS_DST_NAT_DONE;
else
ct->status |= IPS_SRC_NAT_DONE;
return NF_ACCEPT;
}
传入的参数ct为:
请求方向
dip = 1.1.1.1
dport = 10000
sip = 2.2.2.2
sport = 20
protocol = tcp
应答方向:
dip = 2.2.2.2
dport = 20
sip = 1.1.1.1
sport = 10000
protocol = tcp
语句:
nf_ct_invert_tuplepr(&curr_tuple,
&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
的意思是求ct的应答方向的反连接,即:
应答方向:
dip = 2.2.2.2
dport = 20
sip = 1.1.1.1
sport = 10000
protocol = tcp
的反连接,curr_tuple即为请求方向:
请求方向
dip = 1.1.1.1
dport = 10000
sip = 2.2.2.2
sport = 20
protocol = tcp
语句:
get_unique_tuple(&new_tuple, &curr_tuple, range, ct, maniptype);
/* Manipulate the tuple into the range given. For NF_INET_POST_ROUTING,
* we change the source to map into the range. For NF_INET_PRE_ROUTING
* and NF_INET_LOCAL_OUT, we change the destination to map into the
* range. It might not be possible to get a unique tuple, but we try.
* At worst (or if we race), we will end up with a final duplicate in
* __ip_conntrack_confirm and drop the packet. */
static void
get_unique_tuple(struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_tuple *orig_tuple,
const struct nf_nat_range *range,//客户端IP
struct nf_conn *ct,
enum nf_nat_manip_type maniptype)//目的nat
{
const struct nf_conntrack_zone *zone;
const struct nf_nat_l3proto *l3proto;
const struct nf_nat_l4proto *l4proto;
struct net *net = nf_ct_net(ct);
zone = nf_ct_zone(ct);
rcu_read_lock();
l3proto = __nf_nat_l3proto_find(orig_tuple->src.l3num);
l4proto = __nf_nat_l4proto_find(orig_tuple->src.l3num,
orig_tuple->dst.protonum);
/* 1) If this srcip/proto/src-proto-part is currently mapped,
* and that same mapping gives a unique tuple within the given
* range, use that.
*
* This is only required for source (ie. NAT/masq) mappings.
* So far, we don't do local source mappings, so multiple
* manips not an issue.
*/
if (maniptype == NF_NAT_MANIP_SRC &&
!(range->flags & NF_NAT_RANGE_PROTO_RANDOM_ALL)) {
/* try the original tuple first */
if (in_range(l3proto, l4proto, orig_tuple, range)) {
if (!nf_nat_used_tuple(orig_tuple, ct)) {
*tuple = *orig_tuple;
goto out;
}
} else if (find_appropriate_src(net, zone, l3proto, l4proto,
orig_tuple, tuple, range)) {
pr_debug("get_unique_tuple: Found current src map\n");
if (!nf_nat_used_tuple(tuple, ct))
goto out;
}
}
/* 2) Select the least-used IP/proto combination in the given range */
/* 2) 选择最少使用的IP/protocol组合,这里会修改tuple的dip */
*tuple = *orig_tuple;
find_best_ips_proto(zone, tuple, range, ct, maniptype);
/* 3) The per-protocol part of the manip is made to map into
* the range to make a unique tuple.
*/
/* Only bother mapping if it's not already in range and unique */
if (!(range->flags & NF_NAT_RANGE_PROTO_RANDOM_ALL)) {
if (range->flags & NF_NAT_RANGE_PROTO_SPECIFIED) {
if (l4proto->in_range(tuple, maniptype,
&range->min_proto,
&range->max_proto) &&
(range->min_proto.all == range->max_proto.all ||
!nf_nat_used_tuple(tuple, ct)))
goto out;
} else if (!nf_nat_used_tuple(tuple, ct)) {
goto out;
}
}
/* Last change: get protocol to try to obtain unique tuple. */
/* 我们不修改端口 */
l4proto->unique_tuple(l3proto, tuple, range, maniptype, ct);
out:
rcu_read_unlock();
}
/* For [FUTURE] fragmentation handling, we want the least-used
* src-ip/dst-ip/proto triple. Fairness doesn't come into it. Thus
* if the range specifies 1.2.3.4 ports 10000-10005 and 1.2.3.5 ports
* 1-65535, we don't do pro-rata allocation based on ports; we choose
* the ip with the lowest src-ip/dst-ip/proto usage.
* 选择一个最少使用的IP/PRO协议组合
*/
static void
find_best_ips_proto(const struct nf_conntrack_zone *zone,
struct nf_conntrack_tuple *tuple,
const struct nf_nat_range *range,
const struct nf_conn *ct,
enum nf_nat_manip_type maniptype)
{
union nf_inet_addr *var_ipp;
unsigned int i, max;
/* Host order */
u32 minip, maxip, j, dist;
bool full_range;
/* No IP mapping? Do nothing. */
if (!(range->flags & NF_NAT_RANGE_MAP_IPS))
return;
if (maniptype == NF_NAT_MANIP_SRC)
var_ipp = &tuple->src.u3;
else
var_ipp = &tuple->dst.u3;//设置目的IP,先获取目的IP地址
/* Fast path: only one choice. 如果只有一个IP地址,则就使用该IP地址,我们就一个IP,即客户端IP */
if (nf_inet_addr_cmp(&range->min_addr, &range->max_addr)) {
*var_ipp = range->min_addr;
return;
}
if (nf_ct_l3num(ct) == NFPROTO_IPV4)
max = sizeof(var_ipp->ip) / sizeof(u32) - 1;
else
max = sizeof(var_ipp->ip6) / sizeof(u32) - 1;
/* Hashing source and destination IPs gives a fairly even
* spread in practice (if there are a small number of IPs
* involved, there usually aren't that many connections
* anyway). The consistency means that servers see the same
* client coming from the same IP (some Internet Banking sites
* like this), even across reboots.
*/
j = jhash2((u32 *)&tuple->src.u3, sizeof(tuple->src.u3) / sizeof(u32),
range->flags & NF_NAT_RANGE_PERSISTENT ?
0 : (__force u32)tuple->dst.u3.all[max] ^ zone->id);
full_range = false;
for (i = 0; i <= max; i++) {
/* If first bytes of the address are at the maximum, use the
* distance. Otherwise use the full range.
*/
if (!full_range) {
minip = ntohl((__force __be32)range->min_addr.all[i]);
maxip = ntohl((__force __be32)range->max_addr.all[i]);
dist = maxip - minip + 1;
} else {
minip = 0;
dist = ~0;
}
var_ipp->all[i] = (__force __u32)
htonl(minip + reciprocal_scale(j, dist));
if (var_ipp->all[i] != range->max_addr.all[i])
full_range = true;
if (!(range->flags & NF_NAT_RANGE_PERSISTENT))
j ^= (__force u32)tuple->dst.u3.all[i];
}
}
对curr_tuple进行nat构建,替换掉curr_tuple中的目的IP后,得到new_tuple:
dip = 10.10.10.10
dport = 10000
sip = 2.2.2.2
sport = 20
protocol = tcp
if (!nf_ct_tuple_equal(&new_tuple, &curr_tuple)) {//&new_tuple, &curr_tuple必然不相等,
struct nf_conntrack_tuple reply;
/* Alter conntrack table so will recognize replies. */
/* 根据新的五元组得到应答方向的新的五元组 */
nf_ct_invert_tuplepr(&reply, &new_tuple);//将new_tuple进行反转,得到reply
/* 替换应答方向的五元组 */
nf_conntrack_alter_reply(ct, &reply);//将reply作为子连接ct的应答方向五元组
/* Non-atomic: we own this at the moment. */
if (maniptype == NF_NAT_MANIP_SRC)
ct->status |= IPS_SRC_NAT;
else
ct->status |= IPS_DST_NAT;//同时设置需要进行目的NAT,nat模块看到该标志后将会将请求方向的目的IP改为应答方向的源IP。
/* 判断该连接是否存在help,如果存在则必须添加seq-adj扩展功能,数据通道没有help,不需要进行seqadj。 */
if (nfct_help(ct) && !nfct_seqadj(ct))
if (!nfct_seqadj_ext_add(ct))
return NF_DROP;
}
reply为:
dip = 2.2.2.2
dport = 20
sip = 10.10.10.10
sport = 10000
protocol = tcp
这个时候的reply就能命中客户端数据通道的应答了,bingo!
同时设置需要进行目的NATct->status |= IPS_DST_NAT;nat模块看到该标志后将会将请求方向的目的IP改为应答方向的源IP。完成报文的处理。应答方向报文看到该标志后,进行目的NAT的反操作,将应答报文的源IP改为请求方向的目的IP。
被动模式
服务器端发送 xxx,xxx,xxx,xxx,ppp,ppp。NAT设备通过help函数捕获了该消息。该地址是一个公网地址,NAT设备不需要对ftp的内容进行NAT转换(注意是不对内容进行nat转换的,控制通道依然需要进行SNAT转换),NAT设备会直接将该报文的内容发送给客户端。假设服务器端发送命令为2.2.2.2 10000。NAT设备必须为子连接的请求方向构建期望连接,即为:
dip/mask = 2.2.2.2/0xffffffff
dport/mask = 10000/0xffff
sip/mask = rip/0xffffffff //rip为主连接的请求方向源IP,即客户端IP
sport/mask = 0/0 //源端口暂时不知道
protocol = tcp
假设客户端以10.10.10.10 5000连接服务器端2.2.2.2 10000。那么NAT将会为数据通道创建新的连接跟踪为:
请求方向
dip = 2.2.2.2
dport = 10000
sip = 10.10.10.10
sport = 5000
protocol = tcp
应答方向:
dip = 10.10.10.10
dport = 5000
sip = 2.2.2.2
sport = 10000
protocol = tcp
这样的连接跟踪创建后,是无法命中应答方向的报文的:
dip = 1.1.1.1
dport = 5000
sip = 2.2.2.2
sport = 10000
protocol = tcp
所以需要修正应答方向的五元组。
其中exp->dir的值为主连接设置的,该值为help函数收到服务器发送的端口应答时的方向的反方向(被动动模式为应答方向,那么反方向为请求方向),所以exp->dir的值为IP_CT_DIR_ORIGINAL。
/* Setup NAT on this expected conntrack so it follows master. */
/* If we fail to get a free NAT slot, we'll get dropped on confirm */
void nf_nat_follow_master(struct nf_conn *ct,
struct nf_conntrack_expect *exp)
{
struct nf_nat_range range;
/* This must be a fresh one. */
BUG_ON(ct->status & IPS_NAT_DONE_MASK);
/* Change src to where master sends to */
range.flags = NF_NAT_RANGE_MAP_IPS;//进行源NAT
range.min_addr = range.max_addr
= ct->master->tuplehash[!exp->dir].tuple.dst.u3;//这里是应答方向,选取目的IP
nf_nat_setup_info(ct, &range, NF_NAT_MANIP_SRC);//进行源NAT
/* For DST manip, map port here to where it's expected. */
/* 进行DNAT处理 */
range.flags = (NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED);
range.min_proto = range.max_proto = exp->saved_proto;
range.min_addr = range.max_addr
= ct->master->tuplehash[!exp->dir].tuple.src.u3;//这里使用的是主连接的请求方向源IP,即客户端主链接IP。
nf_nat_setup_info(ct, &range, NF_NAT_MANIP_DST);//构建nat信息
}
后面的推理跟主动模式差不多。
**粗体** _斜体_ [链接](http://example.com) `代码` - 列表 > 引用
。你还可以使用@
来通知其他用户。