捕獲TCP/IP協(xié)議棧數(shù)據(jù)包的原理
本文轉(zhuǎn)載自微信公眾號(hào)「編程雜技」,作者 theanarkh 。轉(zhuǎn)載本文請(qǐng)聯(lián)系編程雜技公眾號(hào)。
wireshark或tcpdump相信大家都用過(guò),這些工具看起來(lái)都很酷,因?yàn)槲覀兤綍r(shí)都是在界面看到應(yīng)用層的數(shù)據(jù),這些工具居然可以讓我們看到tcp/ip協(xié)議棧每層的數(shù)據(jù)。本文介紹一下查看tcp/ip協(xié)議棧數(shù)據(jù)的方法。并實(shí)現(xiàn)一個(gè)簡(jiǎn)陋的sniffer,通過(guò)nodejs暴露出來(lái)使用。我們先看實(shí)現(xiàn)。
- #include <stdio.h>
 - #include <errno.h>
 - #include <unistd.h>
 - #include <sys/socket.h>
 - #include <sys/types.h>
 - #include <linux/in.h>
 - #include <linux/if_ether.h>
 - #include <stdlib.h>
 - #include <node_api.h>
 - #define DATA_LEN 500
 - static napi_value start(napi_env env, napi_callback_info info) {
 - int sockfd;
 - int bytes;
 - char data[DATA_LEN];
 - unsigned char *ipHeader;
 - unsigned char *macHeader;
 - unsigned char *transportHeader;
 - // 對(duì)ETH_P_IP協(xié)議的數(shù)據(jù)包感興趣,PF_PACKET在早期內(nèi)核是AF_INET
 - sockfd = socket(PF_PACKET, SOCK_RAW, htons(ETH_P_IP));
 - if (sockfd < 0) {
 - printf("創(chuàng)建socket錯(cuò)誤");
 - exit(1);
 - }
 - while (1) {
 - bytes = recvfrom(sockfd,data,DATA_LEN,0,NULL,NULL);
 - printf("讀到字節(jié)數(shù):%d\n",bytes);
 - macHeader = data;
 - printf("MAC報(bào)文----------\n");
 - printf("源Mac地址: %02x:%02x:%02x:%02x:%02x:%02x\n",
 - macHeader[0],macHeader[1],macHeader[2],
 - macHeader[3],macHeader[4],macHeader[5]);
 - printf("目的Mac地址: %02x:%02x:%02x:%02x:%02x:%02x\n",
 - macHeader[6],macHeader[7],macHeader[8],
 - macHeader[9],macHeader[10],macHeader[11]);
 - printf("上層協(xié)議: %04x\n",
 - (macHeader[12] << 8) + macHeader[13]);
 - // 跳過(guò)Mac頭
 - ipHeader = data + 6 + 6 + 2;
 - printf("IP報(bào)文--------\n");
 - printf("ip協(xié)議版本:%d\n",
 - (ipHeader[0] & 0xF0) >> 4);
 - int ipHeaderLen = (ipHeader[0] & 0x0F) << 2;
 - printf("首部長(zhǎng)度:%d\n",
 - ipHeaderLen);
 - printf("區(qū)分服務(wù):%d\n",
 - ipHeader[1]);
 - printf("總長(zhǎng)度:%d\n",
 - (ipHeader[2]<<8)+ipHeader[3]);
 - printf("標(biāo)識(shí):%d\n",
 - (ipHeader[4]<<8)+ipHeader[5]);
 - printf("標(biāo)志:%d\n",
 - (ipHeader[6] & 0xE0) >> 5);
 - printf("片偏移:%d\n",
 - (ipHeader[6] & 0x11) + ipHeader[7]);
 - printf("TTL:%d\n",
 - ipHeader[8]);
 - printf("上層協(xié)議:%d\n",
 - ipHeader[9]);
 - printf("首部校驗(yàn)和:%x%x\n",
 - ipHeader[10]+ipHeader[11]);
 - printf("源ip:%d.%d.%d.%d\n",
 - ipHeader[12],ipHeader[13],
 - ipHeader[14],ipHeader[15]);
 - printf("目的ip:%d.%d.%d.%d\n",
 - ipHeader[16],ipHeader[17],
 - ipHeader[18],ipHeader[19]);
 - transportHeader = ipHeader + ipHeaderLen;
 - printf("傳輸層報(bào)文-----------\n");
 - printf("源端口:%d\n",
 - (transportHeader[0]<<8)+transportHeader[1]);
 - printf("目的端口:%d\n",
 - (transportHeader[2]<<8)+transportHeader[3]);
 - printf("序列號(hào):%ud%ud%ud%ud\n",
 - transportHeader[4],transportHeader[5],transportHeader[6],transportHeader[7]);
 - printf("確認(rèn)號(hào):%ud\n",
 - (transportHeader[8]<<24)+(transportHeader[9]<<16)+(transportHeader[10]<<8)+(transportHeader[11]));
 - printf("傳輸層首部長(zhǎng)度:%d\n",
 - ((transportHeader[12] & 0xF0) >> 4) * 4);
 - printf("FIN:%d\n",
 - transportHeader[13] & 0x01);
 - printf("SYN:%d\n",
 - (transportHeader[13] & 0x02) >> 1);
 - printf("RST:%d\n",
 - (transportHeader[13] & 0x04) >> 2);
 - printf("PSH:%d\n",
 - (transportHeader[13] & 0x08) >> 3);
 - printf("ACK:%d\n",
 - (transportHeader[13] & 0x016) >> 4);
 - printf("URG:%d\n",
 - (transportHeader[13] & 0x32) >> 5);
 - printf("窗口大?。?d\n",
 - (transportHeader[14] << 8) + transportHeader[15]);
 - }}
 - napi_value Init(napi_env env, napi_value exports) {
 - napi_value func;
 - napi_create_function(env,
 - NULL,
 - NAPI_AUTO_LENGTH,
 - start,
 - NULL,
 - &func);
 - napi_set_named_property(env, exports, "start", func);
 - return exports;
 - }
 - NAPI_MODULE(NODE_GYP_MODULE_NAME, Init)
 
我們看到實(shí)現(xiàn)并不復(fù)雜,首先創(chuàng)建一個(gè)socket,然后接收socket上面的數(shù)據(jù)進(jìn)行分析就行。上面的代碼可以捕獲到所有發(fā)給本機(jī)的tcp/ip包,下面我們看看效果(有些字段還沒(méi)有仔細(xì)處理)。
下面我們來(lái)看看底層的實(shí)現(xiàn)(2.6.13.1內(nèi)核)。我們從socket函數(shù)的實(shí)現(xiàn)開(kāi)始分析。
- asmlinkage long sys_socket(int family, int type, int protocol){
 - int retval;
 - struct socket *sock;
 - // 創(chuàng)建一個(gè)socket
 - retval = sock_create(family, type, protocol, &sock);
 - // 返回文件描述符給用戶
 - retval = sock_map_fd(sock);
 - }
 
接著看sock_create。
- int sock_create(int family, int type, int protocol, struct socket **res){
 - return __sock_create(family, type, protocol, res, 0);
 - }
 - static int __sock_create(int family, int type, int protocol, struct socket **res, int kern){
 - int err;
 - struct socket *sock;
 - // 分配一個(gè)socket
 - if (!(sock = sock_alloc())) {
 - // ...
 - }
 - // socket類型
 - sock->type = type;
 - err = -EAFNOSUPPORT;
 - // 根據(jù)協(xié)議簇拿到對(duì)應(yīng)的函數(shù)集,然后調(diào)用create函數(shù)
 - if ((err = net_families[family]->create(sock, protocol)) < 0)
 - goto out_module_put;
 - }
 
我們看到__sock_create的邏輯很簡(jiǎn)單,根據(jù)協(xié)議簇拿到對(duì)應(yīng)的函數(shù)集,然后執(zhí)行其create函數(shù)。我們看看PF_PACKET協(xié)議簇對(duì)應(yīng)的函數(shù)集。PF_PACKET協(xié)議簇通過(guò)packet_init注冊(cè)了對(duì)應(yīng)的函數(shù)集。
- static int __init packet_init(void){
 - sock_register(&packet_family_ops);
 - }
 - static struct net_proto_family packet_family_ops = {
 - .family = PF_PACKET,
 - .create = packet_create,
 - .owner = THIS_MODULE,
 - };
 
我們看到create函數(shù)的值是packet_create。
- static int packet_create(struct socket *sock, int protocol){
 - struct sock *sk;
 - struct packet_sock *po;
 - int err;
 - // 分配一個(gè)packet_sock結(jié)構(gòu)體
 - sk = sk_alloc(PF_PACKET, GFP_KERNEL, &packet_proto, 1);
 - // 賦值函數(shù)集
 - sock->ops = &packet_ops;
 - // 關(guān)聯(lián)socket和sock
 - sock_init_data(sock, sk);
 - // 拿到一個(gè)packet_sock結(jié)構(gòu)體,第一個(gè)字段是sock結(jié)構(gòu)體(struct packet_sock *po)
 - po = pkt_sk(sk);
 - sk->sk_family = PF_PACKET;
 - // 接收數(shù)據(jù)包的函數(shù)
 - po->prot_hook.func = packet_rcv;
 - po->prot_hook.af_packet_priv = sk;
 - if (protocol) {
 - po->prot_hook.type = protocol;
 - dev_add_pack(&po->prot_hook);
 - sock_hold(sk);
 - po->running = 1;
 - }
 - }
 
packet_create首先創(chuàng)建了一個(gè)packet_sock結(jié)構(gòu)體并初始化,最后調(diào)用dev_add_pack。
- static struct list_head ptype_base[16];
 - void dev_add_pack(struct packet_type *pt){
 - int hash;
 - spin_lock_bh(&ptype_lock);
 - if (pt->type == htons(ETH_P_ALL)) {
 - netdev_nit++;
 - list_add_rcu(&pt->list, &ptype_all);
 - } else {
 - hash = ntohs(pt->type) & 15;
 - list_add_rcu(&pt->list, &ptype_base[hash]);
 - }
 - spin_unlock_bh(&ptype_lock);
 - }
 
我們看到dev_add_pack的邏輯是往ptype_base對(duì)應(yīng)的隊(duì)列加入一個(gè)節(jié)點(diǎn)。接著我們看看網(wǎng)卡收到數(shù)據(jù)包的時(shí)候是如何處理的。
- int netif_receive_skb(struct sk_buff *skb){
 - type = skb->protocol;
 - list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type)&15], list) {
 - if (ptype->type == type &&
 - (!ptype->dev || ptype->dev == skb->dev)) {
 - if (pt_prev)
 - ret = deliver_skb(skb, pt_prev);
 - pt_prev = ptype;
 - }
 - }
 - ret = pt_prev->func(skb, skb->dev, pt_prev);
 - }
 
netif_receive_skb的邏輯中會(huì)根據(jù)收到mac包中上層協(xié)議字段找到對(duì)應(yīng)的處理函數(shù),比如本文的packet。最后執(zhí)行func。從剛才的create函數(shù)我們看到func的值是packet_rcv。
- static int packet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt) {
 - __skb_queue_tail(&sk->sk_receive_queue, skb);
 - sk->sk_data_ready(sk, skb->len);
 - }
 
packet_rcv首先把收到的數(shù)據(jù)包插入socket的接收隊(duì)列,然后調(diào)用sk_data_ready通知socket,對(duì)應(yīng)函數(shù)是sock_def_readable。
- static void sock_def_readable(struct sock *sk, int len){
 - if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
 - wake_up_interruptible(sk->sk_sleep);
 - }
 
sock_def_readable會(huì)喚醒阻塞在該socket的進(jìn)程。那么這個(gè)隊(duì)列里有什么呢?我們回到文章開(kāi)始的代碼,我們創(chuàng)建socket后阻塞在recvfrom。recvfrom通過(guò)層層調(diào)用最后執(zhí)行對(duì)應(yīng)函數(shù)集的recvmsg。
- static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
 - struct msghdr *msg, size_t len, int flags){
 - struct sk_buff *skb;
 - skb=skb_recv_datagram(sk,flags,flags&MSG_DONTWAIT,&err);
 - }
 
packet_recvmsg從socket的接收隊(duì)列取出一個(gè)數(shù)據(jù)包,我們看看skb_recv_datagram。
- struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags,
 - int noblock, int *err){
 - struct sk_buff *skb;
 - long timeo;
 - /*
 - static inline long sock_rcvtimeo(const struct sock *sk, int noblock)
 - {
 - return noblock ? 0 : sk->sk_rcvtimeo;
 - }
 - 獲取沒(méi)有數(shù)據(jù)包時(shí)等待的超時(shí)時(shí)間
 - */
 - timeo = sock_rcvtimeo(sk, noblock);
 - do {
 - skb = skb_dequeue(&sk->sk_receive_queue);
 - // 有則返回
 - if (skb)
 - return skb;
 - // 沒(méi)有
 - error = -EAGAIN;
 - // 不等待則直接返回
 - if (!timeo)
 - goto no_packet;
 - // 否則等待一段時(shí)間
 - } while (!wait_for_packet(sk, err, &timeo));
 - }
 
我們看到?jīng)]有數(shù)據(jù)包的時(shí)候會(huì)等待一段時(shí)間,我們看看這個(gè)時(shí)間是多少。
- sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
 - #define MAX_SCHEDULE_TIMEOUT LONG_MAX
 
我們看到超時(shí)時(shí)間非常長(zhǎng),當(dāng)然這個(gè)值我們可以通過(guò)setsockopt的SO_RCVTIMEO選項(xiàng)設(shè)置。接著我們看等待的邏輯wait_for_packet。
- #define DEFINE_WAIT(name) \
 - wait_queue_t name = { \
 - .private = current, \
 - .func = autoremove_wake_function, \
 - .task_list = LIST_HEAD_INIT((name).task_list), \
 - }
 - static int wait_for_packet(struct sock *sk, int *err, long *timeo_p){
 - DEFINE_WAIT(wait);
 - prepare_to_wait_exclusive(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
 - int error = 0;
 - *timeo_p = schedule_timeout(*timeo_p);
 - out:
 - finish_wait(sk->sk_sleep, &wait);
 - return error
 - }
 
wait_for_packet首先把當(dāng)前進(jìn)程插入對(duì)應(yīng)的等待隊(duì)列并修改進(jìn)程狀態(tài)為非就緒(TASK_INTERRUPTIBLE)
- void fastcall prepare_to_wait_exclusive(wait_queue_head_t *q, wait_queue_t *wait, int state){
 - // 把當(dāng)前進(jìn)程插入等待隊(duì)列
 - if (list_empty(&wait->task_list))
 - __add_wait_queue_tail(q, wait);
 - // 修改進(jìn)程狀態(tài)
 - set_current_state(state);
 - }
 
接著執(zhí)行進(jìn)程調(diào)度schedule_timeout。
- fastcall signed long __sched schedule_timeout(signed long timeout){
 - struct timer_list timer;
 - unsigned long expire;
 - // 超時(shí)時(shí)間
 - expire = timeout + jiffies;
 - // 開(kāi)啟定時(shí)器
 - init_timer(&timer);
 - timer.expires = expire;
 - timer.data = (unsigned long) current;
 - timer.function = process_timeout;
 - // 啟動(dòng)定時(shí)器
 - add_timer(&timer);
 - // 進(jìn)程調(diào)度
 - schedule();
 - timeout = expire - jiffies;
 - out:
 - return timeout < 0 ? 0 : timeout;
 - }
 
以上就是實(shí)現(xiàn)捕獲tcp/ip協(xié)議棧數(shù)據(jù)包的底層原理。代碼倉(cāng)庫(kù)https://github.com/theanarkh/node-sniffer
















 
 
 


 
 
 
 