Linux内核中TCP的连接跟踪
1. 前言在2.6.1*以上的Linux内核中,关于TCP连接跟踪处理有了比较大的修改,增加了TCP可能标志位组合的检查;增加了通过序列号、确认号和窗口值来判断数据包合法性的功能,支持SACK选项;状态转换数组也进行了一些修改和完善,相应程序代码量增加不少。以下2.6内核代码版本为2.6.17.11。2. 通过确认号、序列号和窗口判断数据包合法性该思路提出比
·
1. 前言
在2.6.1*以上的Linux内核中,关于TCP
连接跟踪处理有了比较大的修改,增加了TCP可能标志位组合的检查;增加了通过序列号、确认号和窗口值来判断数据包合法性的功能,支持SACK选项;状态转换数组也进行了一些修改和完善,相应程序代码量增加不少。
以下2.6内核代码版本为2.6.17.11。
2. 通过确认号、序列号和窗口判断数据包合法性
该思路提出比较早,最初是在“Real Statefule TCP Packet Filtering in IP FIlter”中提出的( http://www.nluug.nl/events/sane2000/papers.html ),用在FreeBSD,OpenBSD,NetBSD等操作系统的防火墙IP Filter中。
原理:
TCP
连接开始时进行3次握手,交换MSS等信息,同时在window字段中告诉对方本方的数据接收缓冲区大小,另一方发送数据时一次不能发送超过该大小的数据,也就是一方的序列号变化值不能超过对方提供的window大小,确认号的变化值是不能超过己方提供的window大小,这是正常TCP实现都会遵守的,如果不遵守这条件,说明该数据包非法。
使用该功能要注意两个TCP选项,第一,TCP的SACK(选择性确认)选项,RFC1323,2018,2883,在数据包丢失的情况下,使发送方只重新发送丢失的包而不是全部发送;第二,扩展window选项,该选项可将window值从16位最大扩展到30位。
为描述此功能新增加了一个数据结构:
/* include/linux/netfilter/nf_conntrack_tcp.h */
struct ip_ct_tcp_state {
u_int32_t td_end; /* max of seq + len */
u_int32_t td_maxend; /* max of ack + max(win, 1) */
u_int32_t td_maxwin; /* max(win) */
u_int8_t td_scale; /* window scale factor */
u_int8_t loose; /* used when connection picked up from the middle */
u_int8_t flags; /* per direction options */
};
u_int32_t td_end; /* max of seq + len */
u_int32_t td_maxend; /* max of ack + max(win, 1) */
u_int32_t td_maxwin; /* max(win) */
u_int8_t td_scale; /* window scale factor */
u_int8_t loose; /* used when connection picked up from the middle */
u_int8_t flags; /* per direction options */
};
判断一个TCP包序列号和确认号是否在给定window范围内的函数是tcp_in_window:
static int tcp_in_window(struct ip_ct_tcp *state,
enum ip_conntrack_dir dir,
unsigned int index,
const struct sk_buff *skb,
struct iphdr *iph,
struct tcphdr *tcph)
{
struct ip_ct_tcp_state *sender = &state->seen[dir];
struct ip_ct_tcp_state *receiver = &state->seen[!dir];
__u32 seq, ack, sack, end, win, swin;
int res;
// 客户端发的第一个SYN包是到不了这个函数的,直接就接受了,
// 是从 连接的第2个包以后才进入本函数处理
/*
* Get the required data from the packet.
*/
// 序列号
seq = ntohl(tcph->seq);
// 确认号
ack = sack = ntohl(tcph->ack_seq);
// 本方窗口
win = ntohs(tcph->window);
// 本数据包结束序列号
end = segment_seq_plus_len(seq, skb->len, iph, tcph);
// 接收方支持SACK的话检查是否在TCP选项中有SACK
if (receiver->flags & IP_CT_TCP_FLAG_SACK_PERM)
tcp_sack(skb, iph, tcph, &sack);
enum ip_conntrack_dir dir,
unsigned int index,
const struct sk_buff *skb,
struct iphdr *iph,
struct tcphdr *tcph)
{
struct ip_ct_tcp_state *sender = &state->seen[dir];
struct ip_ct_tcp_state *receiver = &state->seen[!dir];
__u32 seq, ack, sack, end, win, swin;
int res;
// 客户端发的第一个SYN包是到不了这个函数的,直接就接受了,
// 是从 连接的第2个包以后才进入本函数处理
/*
* Get the required data from the packet.
*/
// 序列号
seq = ntohl(tcph->seq);
// 确认号
ack = sack = ntohl(tcph->ack_seq);
// 本方窗口
win = ntohs(tcph->window);
// 本数据包结束序列号
end = segment_seq_plus_len(seq, skb->len, iph, tcph);
// 接收方支持SACK的话检查是否在TCP选项中有SACK
if (receiver->flags & IP_CT_TCP_FLAG_SACK_PERM)
tcp_sack(skb, iph, tcph, &sack);
// 省略号部分是一些调试打印信息,忽略下同
...
if (sender->td_end == 0) {
// 连接初始情况
/*
* Initialize sender data.
*/
if (tcph->syn && tcph->ack) {
// 服务器端
/*
* Outgoing SYN-ACK in reply to a SYN.
*/
sender->td_end =
sender->td_maxend = end;
sender->td_maxwin = (win == 0 ? 1 : win);
// 检查TCP选项,判断接收方是否支持SACK和窗口扩大
tcp_options(skb, iph, tcph, sender);
/*
* RFC 1323:
* Both sides must send the Window Scale option
* to enable window scaling in either direction.
*/
if (!(sender->flags & IP_CT_TCP_FLAG_WINDOW_SCALE
&& receiver->flags & IP_CT_TCP_FLAG_WINDOW_SCALE))
// 不支持窗口扩大
sender->td_scale =
receiver->td_scale = 0;
} else {
/*
* We are in the middle of a connection,
* its history is lost for us.
* Let's try to use the data from the packet.
*/
sender->td_end = end;
sender->td_maxwin = (win == 0 ? 1 : win);
sender->td_maxend = end + sender->td_maxwin;
}
} else if (((state->state == TCP_CONNTRACK_SYN_SENT
&& dir == IP_CT_DIR_ORIGINAL)
|| (state->state == TCP_CONNTRACK_SYN_RECV
&& dir == IP_CT_DIR_REPLY))
&& after(end, sender->td_end)) {
// 发送方重新发包
/*
* RFC 793: "if a TCP is reinitialized ... then it need
* not wait at all; it must only be sure to use sequence
* numbers larger than those recently used."
*/
sender->td_end =
sender->td_maxend = end;
sender->td_maxwin = (win == 0 ? 1 : win);
...
if (sender->td_end == 0) {
// 连接初始情况
/*
* Initialize sender data.
*/
if (tcph->syn && tcph->ack) {
// 服务器端
/*
* Outgoing SYN-ACK in reply to a SYN.
*/
sender->td_end =
sender->td_maxend = end;
sender->td_maxwin = (win == 0 ? 1 : win);
// 检查TCP选项,判断接收方是否支持SACK和窗口扩大
tcp_options(skb, iph, tcph, sender);
/*
* RFC 1323:
* Both sides must send the Window Scale option
* to enable window scaling in either direction.
*/
if (!(sender->flags & IP_CT_TCP_FLAG_WINDOW_SCALE
&& receiver->flags & IP_CT_TCP_FLAG_WINDOW_SCALE))
// 不支持窗口扩大
sender->td_scale =
receiver->td_scale = 0;
} else {
/*
* We are in the middle of a connection,
* its history is lost for us.
* Let's try to use the data from the packet.
*/
sender->td_end = end;
sender->td_maxwin = (win == 0 ? 1 : win);
sender->td_maxend = end + sender->td_maxwin;
}
} else if (((state->state == TCP_CONNTRACK_SYN_SENT
&& dir == IP_CT_DIR_ORIGINAL)
|| (state->state == TCP_CONNTRACK_SYN_RECV
&& dir == IP_CT_DIR_REPLY))
&& after(end, sender->td_end)) {
// 发送方重新发包
/*
* RFC 793: "if a TCP is reinitialized ... then it need
* not wait at all; it must only be sure to use sequence
* numbers larger than those recently used."
*/
sender->td_end =
sender->td_maxend = end;
sender->td_maxwin = (win == 0 ? 1 : win);
tcp_options(skb, iph, tcph, sender);
}
// 非ACK包和RST包,将确认号置为接收方的结束序列号
if (!(tcph->ack)) {
/*
* If there is no ACK, just pretend it was set and OK.
*/
ack = sack = receiver->td_end;
} else if (((tcp_flag_word(tcph) & (TCP_FLAG_ACK|TCP_FLAG_RST)) ==
(TCP_FLAG_ACK|TCP_FLAG_RST))
&& (ack == 0)) {
/*
* Broken TCP stacks, that set ACK in RST packets as well
* with zero ack value.
*/
ack = sack = receiver->td_end;
}
}
// 非ACK包和RST包,将确认号置为接收方的结束序列号
if (!(tcph->ack)) {
/*
* If there is no ACK, just pretend it was set and OK.
*/
ack = sack = receiver->td_end;
} else if (((tcp_flag_word(tcph) & (TCP_FLAG_ACK|TCP_FLAG_RST)) ==
(TCP_FLAG_ACK|TCP_FLAG_RST))
&& (ack == 0)) {
/*
* Broken TCP stacks, that set ACK in RST packets as well
* with zero ack value.
*/
ack = sack = receiver->td_end;
}
// 无数据包或起始包
if (seq == end
&& (!tcph->rst
|| (seq == 0 && state->state == TCP_CONNTRACK_SYN_SENT)))
/*
* Packets contains no data: we assume it is valid
* and check the ack value only.
* However RST segments are always validated by their
* SEQ number, except when seq == 0 (reset sent answering
* SYN.
*/
seq = end = sender->td_end;
...
if (seq == end
&& (!tcph->rst
|| (seq == 0 && state->state == TCP_CONNTRACK_SYN_SENT)))
/*
* Packets contains no data: we assume it is valid
* and check the ack value only.
* However RST segments are always validated by their
* SEQ number, except when seq == 0 (reset sent answering
* SYN.
*/
seq = end = sender->td_end;
...
// 检查序列号和确认号是否在合法范围内
if (sender->loose || receiver->loose ||
(before(seq, sender->td_maxend + 1) &&
after(end, sender->td_end - receiver->td_maxwin - 1) &&
before(sack, receiver->td_end + 1) &&
after(ack, receiver->td_end - MAXACKWINDOW(sender)))) {
// 合法包
/*
* Take into account window scaling (RFC 1323).
*/
// 窗口扩大调整
if (!tcph->syn)
win <<= sender->td_scale;
/*
* Update sender data.
*/
// 发送方窗口调整
swin = win + (sack - ack);
if (sender->td_maxwin < swin)
sender->td_maxwin = swin;
if (after(end, sender->td_end))
sender->td_end = end;
/*
* Update receiver data.
*/
// 接收方的参数调整
if (after(end, sender->td_maxend))
receiver->td_maxwin += end - sender->td_maxend;
if (after(sack + win, receiver->td_maxend - 1)) {
receiver->td_maxend = sack + win;
if (win == 0)
receiver->td_maxend++;
}
if (sender->loose || receiver->loose ||
(before(seq, sender->td_maxend + 1) &&
after(end, sender->td_end - receiver->td_maxwin - 1) &&
before(sack, receiver->td_end + 1) &&
after(ack, receiver->td_end - MAXACKWINDOW(sender)))) {
// 合法包
/*
* Take into account window scaling (RFC 1323).
*/
// 窗口扩大调整
if (!tcph->syn)
win <<= sender->td_scale;
/*
* Update sender data.
*/
// 发送方窗口调整
swin = win + (sack - ack);
if (sender->td_maxwin < swin)
sender->td_maxwin = swin;
if (after(end, sender->td_end))
sender->td_end = end;
/*
* Update receiver data.
*/
// 接收方的参数调整
if (after(end, sender->td_maxend))
receiver->td_maxwin += end - sender->td_maxend;
if (after(sack + win, receiver->td_maxend - 1)) {
receiver->td_maxend = sack + win;
if (win == 0)
receiver->td_maxend++;
}
/*
* Check retransmissions.
*/
// 判断是否是重发包
if (index == TCP_ACK_SET) {
if (state->last_dir == dir
&& state->last_seq == seq
&& state->last_ack == ack
&& state->last_end == end)
state->retrans++;
else {
state->last_dir = dir;
state->last_seq = seq;
state->last_ack = ack;
state->last_end = end;
state->retrans = 0;
}
}
/*
* Close the window of disabled window tracking :-)
*/
if (sender->loose)
sender->loose--;
res = 1;
} else {
...
// 对非法包的缺省策略,0拒绝,非0接受.该参数可通过/proc文件系统设置
res = ip_ct_tcp_be_liberal;
}
...
return res;
}
* Check retransmissions.
*/
// 判断是否是重发包
if (index == TCP_ACK_SET) {
if (state->last_dir == dir
&& state->last_seq == seq
&& state->last_ack == ack
&& state->last_end == end)
state->retrans++;
else {
state->last_dir = dir;
state->last_seq = seq;
state->last_ack = ack;
state->last_end = end;
state->retrans = 0;
}
}
/*
* Close the window of disabled window tracking :-)
*/
if (sender->loose)
sender->loose--;
res = 1;
} else {
...
// 对非法包的缺省策略,0拒绝,非0接受.该参数可通过/proc文件系统设置
res = ip_ct_tcp_be_liberal;
}
...
return res;
}
3. TCP状态转换表
这是2.6.1*中的新转换表:
static const enum tcp_conntrack tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
{
/* ORIGINAL */
/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
/*syn*/ { sSS, sSS, sIG, sIG, sIG, sIG, sIG, sSS, sSS, sIV },
/*synack*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV },
/*fin*/ { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
/*ack*/ { sES, sIV, sES, sES, sCW, sCW, sTW, sTW, sCL, sIV },
/*rst*/ { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sIV },
/*none*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
},
{
/* REPLY */
/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
/*syn*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV },
/*synack*/ { sIV, sSR, sSR, sIG, sIG, sIG, sIG, sIG, sIG, sIV },
/*fin*/ { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
/*ack*/ { sIV, sIG, sSR, sES, sCW, sCW, sTW, sTW, sCL, sIV },
/*rst*/ { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sIV },
/*none*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
}
};
{
/* ORIGINAL */
/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
/*syn*/ { sSS, sSS, sIG, sIG, sIG, sIG, sIG, sSS, sSS, sIV },
/*synack*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV },
/*fin*/ { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
/*ack*/ { sES, sIV, sES, sES, sCW, sCW, sTW, sTW, sCL, sIV },
/*rst*/ { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sIV },
/*none*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
},
{
/* REPLY */
/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
/*syn*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV },
/*synack*/ { sIV, sSR, sSR, sIG, sIG, sIG, sIG, sIG, sIG, sIV },
/*fin*/ { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
/*ack*/ { sIV, sIG, sSR, sES, sCW, sCW, sTW, sTW, sCL, sIV },
/*rst*/ { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sIV },
/*none*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
}
};
这是以前2.4.26中的转换表
static enum tcp_conntrack tcp_conntracks[2][5][TCP_CONNTRACK_MAX] = {
{
/* ORIGINAL */
/* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI */
/*syn*/ {sSS, sES, sSS, sSR, sSS, sSS, sSS, sSS, sSS, sLI },
/*fin*/ {sTW, sFW, sSS, sTW, sFW, sTW, sCL, sTW, sLA, sLI },
/*ack*/ {sES, sES, sSS, sES, sFW, sTW, sCL, sCW, sLA, sES },
/*rst*/ {sCL, sCL, sSS, sCL, sCL, sTW, sCL, sCL, sCL, sCL },
/*none*/{sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
},
{
/* REPLY */
/* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI */
/*syn*/ {sSR, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSR },
/*fin*/ {sCL, sCW, sSS, sTW, sTW, sTW, sCL, sCW, sLA, sLI },
/*ack*/ {sCL, sES, sSS, sSR, sFW, sTW, sCL, sCW, sCL, sLI },
/*rst*/ {sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sLA, sLI },
/*none*/{sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
}
};
该数组的解读方法在以前的文章中介绍过,不再赘述。
从两个数组对比可看到,增加了对SYNACK包的判断,同时数组中的sIV(非法状态)项也增加了很多,使得状态
跟踪更加严格,但不足的是对ACK包还是太宽容,ACK扫描还是防不住。
4. TCP合法标志位组合
TCP的各个标志位的合法组合方式由下面的数组定义,数组每个元素为一种可能的组合方式,除了专门定义合法组合项为1,其他未定义的都属于非法项,值为0。
static const u8 tcp_valid_flags[(TH_FIN|TH_SYN|TH_RST|TH_PUSH|TH_ACK|TH_URG) + 1] =
{
[TH_SYN] = 1,
[TH_SYN|TH_ACK] = 1,
[TH_SYN|TH_PUSH] = 1,
[TH_SYN|TH_ACK|TH_PUSH] = 1,
[TH_RST] = 1,
[TH_RST|TH_ACK] = 1,
[TH_RST|TH_ACK|TH_PUSH] = 1,
[TH_FIN|TH_ACK] = 1,
[TH_ACK] = 1,
[TH_ACK|TH_PUSH] = 1,
[TH_ACK|TH_URG] = 1,
[TH_ACK|TH_URG|TH_PUSH] = 1,
[TH_FIN|TH_ACK|TH_PUSH] = 1,
[TH_FIN|TH_ACK|TH_URG] = 1,
[TH_FIN|TH_ACK|TH_URG|TH_PUSH] = 1,
};
{
[TH_SYN] = 1,
[TH_SYN|TH_ACK] = 1,
[TH_SYN|TH_PUSH] = 1,
[TH_SYN|TH_ACK|TH_PUSH] = 1,
[TH_RST] = 1,
[TH_RST|TH_ACK] = 1,
[TH_RST|TH_ACK|TH_PUSH] = 1,
[TH_FIN|TH_ACK] = 1,
[TH_ACK] = 1,
[TH_ACK|TH_PUSH] = 1,
[TH_ACK|TH_URG] = 1,
[TH_ACK|TH_URG|TH_PUSH] = 1,
[TH_FIN|TH_ACK|TH_PUSH] = 1,
[TH_FIN|TH_ACK|TH_URG] = 1,
[TH_FIN|TH_ACK|TH_URG|TH_PUSH] = 1,
};
5. 与netlink的接口
新的协议
跟踪结构struct ip_conntrack_protocol中增加了4个和netlink接口相关函数,用于通过netlink套接口传递
跟踪协议相关信息。
/* convert protoinfo to nfnetink attributes */
int (*to_nfattr)(struct sk_buff *skb, struct nfattr *nfa,
const struct ip_conntrack *ct);
/* convert nfnetlink attributes to protoinfo */
int (*from_nfattr)(struct nfattr *tb[], struct ip_conntrack *ct);
int (*from_nfattr)(struct nfattr *tb[], struct ip_conntrack *ct);
int (*tuple_to_nfattr)(struct sk_buff *skb,
const struct ip_conntrack_tuple *t);
int (*nfattr_to_tuple)(struct nfattr *tb[],
struct ip_conntrack_tuple *t);
const struct ip_conntrack_tuple *t);
int (*nfattr_to_tuple)(struct nfattr *tb[],
struct ip_conntrack_tuple *t);
在TCP协议中对应函数为:
.to_nfattr = tcp_to_nfattr,
.from_nfattr = nfattr_to_tcp,
.tuple_to_nfattr = ip_ct_port_tuple_to_nfattr,
.nfattr_to_tuple = ip_ct_port_nfattr_to_tuple,
6. 结论
http://blog.chinaunix.net/uid-127037-id-2919474.html
2.6.1*的TCP协议 跟踪处理比2.4考虑的因素增加了很多,这些新功能的使用可使系统的安全性进一步有所提高。
2.6.1*的TCP协议 跟踪处理比2.4考虑的因素增加了很多,这些新功能的使用可使系统的安全性进一步有所提高。
更多推荐
已为社区贡献4条内容
所有评论(0)