Linux内核网络源码走读之Netfilter( 三 )

接下来看一下方法nf_conntrack_in():
unsigned int nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, struct sk_buff *skb)l3proto = __nf_ct_l3proto_find(pf);//对于pf=PF_INET,PF_INET,返回的是全局变量nf_conntrack_l3proto_ipv4l3proto->get_l4proto(skb, skb_network_offset(skb), &dataoff, &protonum); //.get_l4proto=ipv4_get_l4proto//对于IPv4 ->get_l4proto=ipv4_get_l4proto*dataoff = nhoff + (iph->ihl << 2);*protonum = iph->protocol; //protonum即四层协议l4proto = __nf_ct_l4proto_find(pf, protonum); //以IPPROTO_TCP为例,返回的是全局变量nf_conntrack_l4proto_tcp4resolve_normal_ct(net, tmpl, skb, dataoff, pf, protonum, l3proto, l4proto);struct nf_conntrack_tuple tuple;struct nf_conntrack_tuple_hash *h;nf_ct_get_tuple() //填充tuplehash = hash_conntrack_raw(&tuple, net); //对tuple进行hash散列运算,调用的内核提供的jhash2()h = __nf_conntrack_find_get(net, zone, &tuple, hash); //在全局变量nf_conntrack_hash hash表下查找连接是否存在if (!h) //如果连接不存在,则新建一个连接,保存到unconfirmed listh = init_conntrack(net, tmpl, &tuple, l3proto, l4proto,skb, dataoff, hash);ct = nf_ct_tuplehash_to_ctrack(h); //利用container_of得到真正的连接对象...//一系列ctinfo赋值逻辑,对于新建的连接ctinfo = IP_CT_NEWnf_ct_set(skb, ct, ctinfo); //将连接对象和连接状态值,保存到skb中skb->_nfct = (unsigned long)ct | info; //借助指针低4位一定为0的逻辑,低4位存整数值timeouts = nf_ct_timeout_lookup(net, ct, l4proto);l4proto->packet(ct, skb, dataoff, ctinfo, pf, timeouts); //以TCP为例,->packet==tcp_packet()再看下ipv4_confirm()的代码:
ipv4_confirmnf_conntrack_confirmstatic inline int nf_conntrack_confirm(struct sk_buff *skb)ct = nf_ct_get(skb, &ctinfo);...nf_ct_del_from_dying_or_unconfirmed_list(ct); //从unconfirmed或dying表中删除连接...__nf_conntrack_hash_insert(ct, hash, reply_hash); //插入到nf_conntrack_hash...iptablesiptables由内核部分和用户空间部分组成,核心是内核部分 。
iptables的字面意思就是ip表项,每个表由struct xt_table表示 。IPv4中,注册和注销表的接口是ipt_register_table()和ipt_unregister_table() 。
struct xt_table {struct list_head list;/* What hooks you will enter on */unsigned int valid_hooks;/* Man behind the curtain... */struct xt_table_info *private; //struct module *me;u_int8_t af;/* address/protocol family */int priority;/* hook order *//* called when table is needed in the given netns */int (*table_init)(struct net *net);const char name[XT_TABLE_MAXNAMELEN];};int ipt_register_table(struct net *net, const struct xt_table *table,const struct ipt_replace *repl,const struct nf_hook_ops *ops, struct xt_table **res)xt_register_table(net, table, &bootstrap, newinfo);list_add(&table->list, &net->xt.tables[table->af]); //注册到net->xt.tables上nf_register_net_hooks(net, ops, hweight32(table->valid_hooks)) //注册netfilter钩子struct net对象包含IPv4和IPv6专用对象netns_ipv4和netns_ipv6,netns_ipv4和netns_ipv6又包含指向xt_table对象的指针 。例如netns_ipv4包含iptable_filter、iptable_mangle、iptable_raw、arptable_filter、nat_table 。
我们以iptable_filter过滤表为例,来进一步看下iptables的工作原理 。
//filter表的定义#define FILTER_VALID_HOOKS ((1 << NF_INET_LOCAL_IN) |(1 << NF_INET_FORWARD) |(1 << NF_INET_LOCAL_OUT))static const struct xt_table packet_filter = {.name= "filter",.valid_hooks= FILTER_VALID_HOOKS, //按照FILTER_VALID_HOOKS定义,在netfilter的3个挂载点挂载钩子.me= THIS_MODULE,.af= NFPROTO_IPV4,.priority= NF_IP_PRI_FILTER,.table_init = iptable_filter_table_init,};//初始化static int __init iptable_filter_init(void)//这一步主要是初始化netfilter钩子挂载对象,3个挂载点的回调函数都是iptable_filter_hookfilter_ops = xt_hook_ops_alloc(&packet_filter, iptable_filter_hook);register_pernet_subsys(&iptable_filter_net_ops)iptable_filter_net_initiptable_filter_table_init(net)//注册filter表ipt_register_table(net, &packet_filter, repl, filter_ops,&net->ipv4.iptable_filter);总结下,内核提供了一些表,表里的条目由用户空间程序设置 。
看一个用户空间iptables命令例子:
iptables -A INPUT -p udp --dport=5001 -j LOG --log-level 1这条规则的意思是,向filter表中添加一条规则,将目标端口为5001的UDP入站数据包转储到系统日志中 。使用iptables命令时,应使用修饰符-t来指定要使用的表,如果没指定,默认使用过滤表 。
再看一个规则:
iptables -A INPUT -p tcp -m conntrack --ctstate ESTABLISHED -j LOG --log-level 1这个规则是根据连接跟踪状态来过滤数据包,将连接状态为ESTABLISHED的数据包转储到系统日志中 。


推荐阅读