Linux内核网络源码走读之Netfilter( 二 )


连接跟踪初始化先看下连接跟踪模块定义的netfilter挂载点对象数组,即结构体struct nf_hook_ops数组,定义在netfilter各挂载点的处理函数 。
static const struct nf_hook_ops ipv4_conntrack_ops[] = {{.hook= ipv4_conntrack_in,.pf= NFPROTO_IPV4,.hooknum= NF_INET_PRE_ROUTING,.priority= NF_IP_PRI_CONNTRACK,},{.hook= ipv4_conntrack_local,.pf= NFPROTO_IPV4,.hooknum= NF_INET_LOCAL_OUT,.priority= NF_IP_PRI_CONNTRACK,},{.hook= ipv4_helper,.pf= NFPROTO_IPV4,.hooknum= NF_INET_POST_ROUTING,.priority= NF_IP_PRI_CONNTRACK_HELPER,},{.hook= ipv4_confirm,.pf= NFPROTO_IPV4,.hooknum= NF_INET_POST_ROUTING,.priority= NF_IP_PRI_CONNTRACK_CONFIRM,},{.hook= ipv4_helper,.pf= NFPROTO_IPV4,.hooknum= NF_INET_LOCAL_IN,.priority= NF_IP_PRI_CONNTRACK_HELPER,},{.hook= ipv4_confirm,.pf= NFPROTO_IPV4,.hooknum= NF_INET_LOCAL_IN,.priority= NF_IP_PRI_CONNTRACK_CONFIRM,},};注册的最重要的连接跟踪回调函数是,NF_INET_PRE_ROUTING钩子回调函数ipv4_conntrack_in和NF_INET_LOCAL_OUT钩子回调函数ipv4_conntrack_local 。这两个钩子函数的优先级为NF_IP_PRI_CONNTRACK(-200),优先级较高 。ipv4_conntrack_in和ipv4_conntrack_local都会调用到nf_conntrack_in,下一小结走读nf_conntrack_in 。
继续看下注册这个ipv4_conntrack_ops的地方 。在内核版本4.9及以前,直接在函数
nf_conntrack_l3proto_ipv4_init中调用nf_register_hooks来注册 。4.10及以后内核,不在nf_conntrack_l3proto_ipv4_init中直接注册ipv4_conntrack_ops,看下相关代码:
//nf_conntrack_l3proto_ipv4.c//nf_conntrack_l3proto_ipv4_init为nf_conntrack_ipv4.ko的初始化函数module_init(nf_conntrack_l3proto_ipv4_init);static int __init nf_conntrack_l3proto_ipv4_init(void)...ret = nf_ct_l3proto_register(&nf_conntrack_l3proto_ipv4);rcu_assign_pointer(nf_ct_l3protos[proto->l3proto], proto); //注册到全局变量nf_ct_l3protos中struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 __read_mostly = {.l3proto= PF_INET,.pkt_to_tuple= ipv4_pkt_to_tuple,.invert_tuple= ipv4_invert_tuple,.get_l4proto= ipv4_get_l4proto,#if IS_ENABLED(CONFIG_NF_CT_NETLINK).tuple_to_nlattr = ipv4_tuple_to_nlattr,.nlattr_to_tuple = ipv4_nlattr_to_tuple,.nla_policy= ipv4_nla_policy,.nla_size= NLA_ALIGN(NLA_HDRLEN + sizeof(u32)) + /* CTA_IP_V4_SRC */NLA_ALIGN(NLA_HDRLEN + sizeof(u32)),/* CTA_IP_V4_DST */#endif.net_ns_get= ipv4_hooks_register, //这里注册的函数用于注册连接跟踪的netfliter钩子.net_ns_put= ipv4_hooks_unregister,.me= THIS_MODULE,};//先看下ipv4_hooks_registerstatic int ipv4_hooks_register(struct net *net)struct conntrack4_net *cnet = net_generic(net, conntrack4_net_id);cnet->users++;if (cnet->users > 1)goto out_unlock; //只在第一次调用的时候往下走,之后的调用只是users技术+1//注册连接跟踪的netfilter钩子nf_register_net_hooks(net, ipv4_conntrack_ops, ARRAY_SIZE(ipv4_conntrack_ops));//再看下调用nf_conntrack_l3proto_ipv4.net_ns_get的地方int nf_ct_netns_get(struct net *net, u8 nfproto)if (nfproto == NFPROTO_INET)nf_ct_netns_do_get(net, NFPROTO_IPV4)nf_ct_netns_do_get(net, NFPROTO_IPV6)static int nf_ct_netns_do_get(struct net *net, u8 nfproto)const struct nf_conntrack_l3proto *l3proto;l3proto = __nf_ct_l3proto_find(nfproto); //对于NFPROTO_IPV4,这里返回的是nf_conntrack_l3proto_ipv4l3proto->net_ns_get(net); //调用net_ns_get//调用nf_ct_netns_get地方有很多,主要应该是通过NFT_ct_get_init和nft_nat_init下图展示了IPv4连接跟踪钩子函数在IPv4收发流程中的位置,其中绿色方块是netfilter的5个钩子挂载点,蓝色方块是连接跟踪模块注册的钩子函数:

Linux内核网络源码走读之Netfilter

文章插图
连接跟踪netfilter挂载点
用来区分特定方向上的流的结构体是struct nf_conntrack_tuple:
struct nf_conntrack_tuple {struct nf_conntrack_man src;//tuple的可操作部分/* 以下是tuple的固定部分 */struct {union nf_inet_addr u3;union {/* Add other protocols here. */__be16 all;struct {__be16 port;} tcp;struct {__be16 port;} udp;struct {u_int8_t type, code;} icmp;struct {__be16 port;} dccp;struct {__be16 port;} sctp;struct {__be16 key;} gre;} u;u_int8_t protonum; //protocolu_int8_t dir;} dst;};连接跟踪条目【Linux内核网络源码走读之Netfilter】struct nf_conn表示连接跟踪条目,即保存到连接跟踪hash表里的节点 。
struct nf_conn {struct nf_conntrack ct_general;spinlock_tlock;u16cpu;struct nf_conntrack_zone zone;struct nf_conntrack_tuple_hash tuplehash[IP_CT_DIR_MAX]; //hashlist节点unsigned long status;u32 timeout;possible_net_t ct_net;struct hlist_nodenat_bysource;/* all members below initialized via memset */struct { } __nfct_init_offset;struct nf_conn *master;u_int32_t mark;u_int32_t secmark;struct nf_ct_ext *ext;union nf_conntrack_proto proto;};


推荐阅读