Linux内核中的网络协议栈详解
引言
网络协议栈是Linux内核中负责网络通信的核心组件,它实现了从物理层到应用层的各种网络协议。Linux网络协议栈的设计非常优雅,采用了分层架构,使得不同协议之间可以相互独立。本文将深入探讨Linux内核中的网络协议栈,包括其架构、实现和优化。
网络协议栈的架构
1. 网络协议栈的层次结构
应用层
↓
传输层(TCP/UDP)
↓
网络层(IP)
↓
链路层(Ethernet)
↓
物理层
2. Linux网络协议栈的组件
- socket层:提供用户空间接口
- 传输层:实现TCP、UDP等协议
- 网络层:实现IP、ICMP等协议
- 链路层:实现以太网等协议
- 设备驱动:与物理设备交互
3. 网络协议栈的核心结构
#include <linux/net.h>
struct sock {
struct socket_wq *sk_wq;
struct sk_buff_head sk_receive_queue;
struct sk_buff_head sk_write_queue;
struct sock_common __sk_common;
int sk_rcvbuf;
int sk_sndbuf;
// 其他字段...
};
struct sk_buff {
struct sk_buff *next;
struct sk_buff *prev;
struct sock *sk;
struct net_device *dev;
unsigned int len;
unsigned int data_len;
unsigned char *head;
unsigned char *data;
unsigned char *tail;
unsigned char *end;
// 其他字段...
};
网络设备子系统
1. 网络设备的结构
#include <linux/netdevice.h>
struct net_device {
char name[IFNAMSIZ];
unsigned long state;
struct net_device_ops *netdev_ops;
struct ethtool_ops *ethtool_ops;
unsigned int mtu;
unsigned short type;
unsigned short flags;
unsigned char dev_addr[MAX_ADDR_LEN];
struct net_device_stats stats;
struct napi_struct *napi;
// 其他字段...
};
2. 网络设备的操作
#include <linux/netdevice.h>
struct net_device_ops {
int (*ndo_init)(struct net_device *dev);
void (*ndo_uninit)(struct net_device *dev);
int (*ndo_open)(struct net_device *dev);
int (*ndo_stop)(struct net_device *dev);
netdev_tx_t (*ndo_start_xmit)(struct sk_buff *skb,
struct net_device *dev);
void (*ndo_set_rx_mode)(struct net_device *dev);
int (*ndo_set_mac_address)(struct net_device *dev, void *addr);
// 其他操作...
};
3. NAPI机制
NAPI(New API)是Linux内核中用于高效处理网络数据包的机制。
#include <linux/netdevice.h>
struct napi_struct {
struct list_head poll_list;
struct net_device *dev;
int (*poll)(struct napi_struct *, int);
int weight;
unsigned int state;
};
// NAPI操作
void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
int (*poll)(struct napi_struct *, int), int weight);
void napi_enable(struct napi_struct *napi);
void napi_disable(struct napi_struct *napi);
int napi_schedule(struct napi_struct *napi);
void napi_complete(struct napi_struct *napi);
网络协议的实现
1. IP协议
#include <linux/ip.h>
struct iphdr {
__u8 ihl:4;
__u8 version:4;
__u8 tos;
__be16 tot_len;
__be16 id;
__be16 frag_off;
__u8 ttl;
__u8 protocol;
__sum16 check;
__be32 saddr;
__be32 daddr;
};
// IP处理函数
int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev);
int ip_output(struct sk_buff *skb);
2. TCP协议
#include <linux/tcp.h>
struct tcphdr {
__be16 source;
__be16 dest;
__be32 seq;
__be32 ack_seq;
__u16 res1:4;
__u16 doff:4;
__u8 fin;
__u8 syn;
__u8 rst;
__u8 psh;
__u8 ack;
__u8 urg;
__u16 window;
__sum16 check;
__u16 urg_ptr;
};
// TCP处理函数
int tcp_v4_rcv(struct sk_buff *skb);
int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, gfp_t gfp_mask);
3. UDP协议
#include <linux/udp.h>
struct udphdr {
__be16 source;
__be16 dest;
__be16 len;
__sum16 check;
};
// UDP处理函数
int udp_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev);
int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len);
网络套接字
1. 套接字的结构
#include <linux/net.h>
struct socket {
socket_state state;
ktime_t ctime;
struct file *file;
struct sock *sk;
const struct proto_ops *ops;
};
struct proto_ops {
int (*family)(struct socket *sock);
int (*create)(struct socket *sock, int protocol, int kern);
int (*connect)(struct socket *sock, struct sockaddr *addr, int addr_len, int flags);
int (*socketpair)(struct socket *sock1, struct socket *sock2);
int (*accept)(struct socket *sock, struct socket *newsock, int flags);
int (*getname)(struct socket *sock, struct sockaddr *addr, int *addr_len, int peer);
// 其他操作...
};
2. 套接字的系统调用
// 套接字系统调用
asmlinkage long sys_socket(int family, int type, int protocol);
asmlinkage long sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen);
asmlinkage long sys_connect(int fd, struct sockaddr __user *uservaddr, int addrlen);
asmlinkage long sys_listen(int fd, int backlog);
asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr, int __user *upeer_addrlen);
asmlinkage long sys_sendto(int fd, void __user *buff, size_t len, unsigned int flags, struct sockaddr __user *addr, int addr_len);
asmlinkage long sys_recvfrom(int fd, void __user *buff, size_t len, unsigned int flags, struct sockaddr __user *addr, int __user *addr_len);
3. 套接字的使用
#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>
int main() {
int sockfd, newsockfd;
struct sockaddr_in serv_addr, cli_addr;
socklen_t clilen;
char buffer[256];
// 创建套接字
sockfd = socket(AF_INET, SOCK_STREAM, 0);
// 绑定地址
serv_addr.sin_family = AF_INET;
serv_addr.sin_addr.s_addr = INADDR_ANY;
serv_addr.sin_port = htons(8080);
bind(sockfd, (struct sockaddr *)&serv_addr, sizeof(serv_addr));
// 监听连接
listen(sockfd, 5);
// 接受连接
clilen = sizeof(cli_addr);
newsockfd = accept(sockfd, (struct sockaddr *)&cli_addr, &clilen);
// 读取数据
read(newsockfd, buffer, 255);
// 发送数据
write(newsockfd, "Hello, World!", 13);
// 关闭套接字
close(newsockfd);
close(sockfd);
return 0;
}
网络性能优化
1. 网络参数调优
# TCP缓冲区
sysctl -w net.core.rmem_max=26214400
sysctl -w net.core.wmem_max=26214400
sysctl -w net.ipv4.tcp_rmem="4096 87380 26214400"
sysctl -w net.ipv4.tcp_wmem="4096 65536 26214400"
# TCP优化
sysctl -w net.ipv4.tcp_fastopen=3
sysctl -w net.ipv4.tcp_slow_start_after_idle=0
sysctl -w net.ipv4.tcp_tw_reuse=1
sysctl -w net.ipv4.tcp_fin_timeout=15
sysctl -w net.ipv4.tcp_max_syn_backlog=65535
sysctl -w net.core.somaxconn=65535
# 网络设备优化
ethtool -K eth0 gro on
ethtool -K eth0 gso on
ethtool -K eth0 tso on
ethtool -K eth0 lro off
2. 网卡调优
# 查看网卡信息
ethtool eth0
# 设置MTU
ifconfig eth0 mtu 9000
# 调整中断亲和性
echo "4,5,6,7" > /proc/irq/40/smp_affinity_list
# 调整RX/TX队列
ethtool -L eth0 rx 4 tx 4
# 调整缓冲区大小
ethtool -G eth0 rx 4096 tx 4096
3. 网络栈调优
# 启用BBR拥塞控制
sysctl -w net.ipv4.tcp_congestion_control=bbr
# 启用ECN
sysctl -w net.ipv4.tcp_ecn=1
# 调整TCP时间戳
sysctl -w net.ipv4.tcp_timestamps=1
# 调整保活参数
sysctl -w net.ipv4.tcp_keepalive_time=600
sysctl -w net.ipv4.tcp_keepalive_probes=3
sysctl -w net.ipv4.tcp_keepalive_intvl=15
实际案例分析
1. 高性能服务器
#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <fcntl.h>
int main() {
int sockfd, newsockfd;
struct sockaddr_in serv_addr, cli_addr;
socklen_t clilen;
char buffer[256];
int flags;
// 创建套接字
sockfd = socket(AF_INET, SOCK_STREAM, 0);
// 设置非阻塞
flags = fcntl(sockfd, F_GETFL, 0);
fcntl(sockfd, F_SETFL, flags | O_NONBLOCK);
// 绑定地址
serv_addr.sin_family = AF_INET;
serv_addr.sin_addr.s_addr = INADDR_ANY;
serv_addr.sin_port = htons(8080);
bind(sockfd, (struct sockaddr *)&serv_addr, sizeof(serv_addr));
// 监听连接
listen(sockfd, 1024);
// 接受连接
clilen = sizeof(cli_addr);
while (1) {
newsockfd = accept(sockfd, (struct sockaddr *)&cli_addr, &clilen);
if (newsockfd > 0) {
// 处理连接
write(newsockfd, "Hello, World!", 13);
close(newsockfd);
}
}
close(sockfd);
return 0;
}
2. 网络驱动优化
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/netdevice.h>
static struct net_device *my_dev;
static int my_open(struct net_device *dev) {
printk(KERN_INFO "%s: opened\n", dev->name);
netif_start_queue(dev);
return 0;
}
static int my_stop(struct net_device *dev) {
printk(KERN_INFO "%s: stopped\n", dev->name);
netif_stop_queue(dev);
return 0;
}
static netdev_tx_t my_start_xmit(struct sk_buff *skb, struct net_device *dev) {
dev->stats.tx_packets++;
dev->stats.tx_bytes += skb->len;
// 发送数据包
dev_kfree_skb(skb);
return NETDEV_TX_OK;
}
static struct net_device_ops my_netdev_ops = {
.ndo_open = my_open,
.ndo_stop = my_stop,
.ndo_start_xmit = my_start_xmit,
};
static void my_setup(struct net_device *dev) {
dev->netdev_ops = &my_netdev_ops;
dev->flags |= IFF_NOARP;
dev->features |= NETIF_F_HW_CSUM;
eth_hw_addr_random(dev);
}
static int __init my_netdev_init(void) {
my_dev = alloc_netdev(0, "myeth%d", NET_NAME_UNKNOWN, my_setup);
if (!my_dev)
return -ENOMEM;
if (register_netdev(my_dev)) {
free_netdev(my_dev);
return -EIO;
}
printk(KERN_INFO "Network device registered: %s\n", my_dev->name);
return 0;
}
static void __exit my_netdev_exit(void) {
unregister_netdev(my_dev);
free_netdev(my_dev);
}
module_init(my_netdev_init);
module_exit(my_netdev_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Demo");
MODULE_DESCRIPTION("Network device driver demo");
结论
Linux网络协议栈是一个复杂而强大的系统,它实现了从物理层到应用层的各种网络协议。通过理解网络协议栈的架构和实现,我们可以更好地优化网络性能,开发高性能的网络应用。随着网络技术的不断发展,Linux网络协议栈也在不断演进,为各种网络应用提供更高效、更可靠的支持。
982

被折叠的 条评论
为什么被折叠?



