Python多线程编程全面指南

最新推荐文章于 2026-06-12 09:04:21 发布

原创最新推荐文章于 2026-06-12 09:04:21 发布 · 1k 阅读

9 ·

本内容遵循CC 4.0 BY-SA版权协议

GEO检测

前言

在多核CPU成为主流的今天，有效地利用多线程技术可以显著提升程序的执行效率。Python作为一门广泛使用的高级编程语言，提供了多种实现多线程编程的方式。本文将深入探讨Python中的多线程编程，涵盖基本概念、常用模块以及实际应用场景。

1. 多线程基础概念

1.1 什么是线程

线程是操作系统能够进行运算调度的最小单位，被包含在进程之中，是进程中的实际运作单位。一个进程可以并发多个线程，每条线程并行执行不同的任务。

1.2 线程 vs 进程

进程：资源分配的基本单位，拥有独立的内存空间
线程：CPU调度的基本单位，共享进程的内存空间
线程间通信比进程间通信更高效
线程创建和切换的开销小于进程

1.3 Python的GIL（全局解释器锁）

Python的全局解释器锁（GIL）是一个重要的概念，它确保在任何时刻只有一个线程在执行Python字节码。这意味着：

I/O密集型任务适合使用多线程
CPU密集型任务可能无法从多线程中受益

2. threading模块详解

2.1 创建线程的两种方式

方式一：继承Thread类

python

import threading
import time

class MyThread(threading.Thread):
    def __init__(self, thread_id, name):
        threading.Thread.__init__(self)
        self.thread_id = thread_id
        self.name = name
    
    def run(self):
        print(f"线程 {self.name} 开始")
        print_time(self.name, 3)
        print(f"线程 {self.name} 结束")

def print_time(thread_name, delay):
    count = 0
    while count < 5:
        time.sleep(delay)
        count += 1
        print(f"{thread_name}: {time.ctime(time.time())}")

# 创建新线程
thread1 = MyThread(1, "Thread-1")
thread2 = MyThread(2, "Thread-2")

# 启动线程
thread1.start()
thread2.start()

# 等待线程结束
thread1.join()
thread2.join()

print("主线程退出")

方式二：直接使用Thread对象

python

import threading
import time

def worker(thread_name, delay):
    print(f"{thread_name} 开始执行")
    count = 0
    while count < 5:
        time.sleep(delay)
        count += 1
        print(f"{thread_name}: 执行次数 {count}")
    print(f"{thread_name} 执行完毕")

# 创建线程
t1 = threading.Thread(target=worker, args=("线程-1", 1))
t2 = threading.Thread(target=worker, args=("线程-2", 2))

# 启动线程
t1.start()
t2.start()

# 等待线程结束
t1.join()
t2.join()

print("所有线程执行完成")

2.2 线程同步

使用Lock实现同步

python

import threading
import time

class Counter:
    def __init__(self):
        self.value = 0
        self.lock = threading.Lock()
    
    def increment(self):
        with self.lock:
            current = self.value
            time.sleep(0.001)  # 模拟一些处理时间
            self.value = current + 1

def test_counter():
    counter = Counter()
    
    # 创建100个线程，每个线程增加计数器100次
    threads = []
    for _ in range(100):
        thread = threading.Thread(target=lambda: [counter.increment() for _ in range(100)])
        threads.append(thread)
        thread.start()
    
    # 等待所有线程完成
    for thread in threads:
        thread.join()
    
    print(f"最终计数器值: {counter.value}")

test_counter()

使用RLock（可重入锁）

python

import threading

class SharedResource:
    def __init__(self):
        self.rlock = threading.RLock()
        self.value = 0
    
    def operation1(self):
        with self.rlock:
            self.value += 1
            self.operation2()  # 可以重入
    
    def operation2(self):
        with self.rlock:  # 这里不会阻塞，因为已经是锁的持有者
            self.value += 2

resource = SharedResource()
threads = []

for i in range(5):
    t = threading.Thread(target=resource.operation1)
    threads.append(t)
    t.start()

for t in threads:
    t.join()

print(f"最终值: {resource.value}")

使用Semaphore控制并发数

python

import threading
import time
import random

# 最多允许3个线程同时访问
semaphore = threading.Semaphore(3)

def access_resource(thread_id):
    print(f"线程 {thread_id} 等待访问资源")
    with semaphore:
        print(f"线程 {thread_id} 获得访问权限")
        time.sleep(random.uniform(1, 3))
        print(f"线程 {thread_id} 释放资源")

threads = []
for i in range(10):
    t = threading.Thread(target=access_resource, args=(i,))
    threads.append(t)
    t.start()

for t in threads:
    t.join()

使用Condition实现线程间通信

python

import threading
import time

class ProducerConsumer:
    def __init__(self):
        self.items = []
        self.condition = threading.Condition()
        self.max_size = 5
    
    def produce(self):
        for i in range(10):
            with self.condition:
                while len(self.items) >= self.max_size:
                    print("缓冲区已满，生产者等待")
                    self.condition.wait()
                
                item = f"产品-{i}"
                self.items.append(item)
                print(f"生产: {item}")
                
                # 通知消费者
                self.condition.notify()
            
            time.sleep(0.5)
    
    def consume(self):
        for i in range(10):
            with self.condition:
                while not self.items:
                    print("缓冲区为空，消费者等待")
                    self.condition.wait()
                
                item = self.items.pop(0)
                print(f"消费: {item}")
                
                # 通知生产者
                self.condition.notify()
            
            time.sleep(1)

pc = ProducerConsumer()

producer = threading.Thread(target=pc.produce)
consumer = threading.Thread(target=pc.consume)

producer.start()
consumer.start()

producer.join()
consumer.join()

使用Event进行线程间信号传递

python

import threading
import time

class Worker:
    def __init__(self):
        self.event = threading.Event()
    
    def wait_for_event(self, worker_id):
        print(f"工作者 {worker_id} 等待事件")
        self.event.wait()
        print(f"工作者 {worker_id} 收到事件，开始工作")
    
    def set_event(self):
        time.sleep(3)
        print("主线程设置事件")
        self.event.set()

worker = Worker()

threads = []
for i in range(3):
    t = threading.Thread(target=worker.wait_for_event, args=(i,))
    threads.append(t)
    t.start()

# 主线程设置事件
setter = threading.Thread(target=worker.set_event)
setter.start()

for t in threads:
    t.join()

setter.join()

2.3 线程池的使用

python

from concurrent.futures import ThreadPoolExecutor
import time
import random

def task(name, duration):
    print(f"任务 {name} 开始，预计耗时 {duration} 秒")
    time.sleep(duration)
    result = f"任务 {name} 完成，耗时 {duration} 秒"
    return result

def use_thread_pool():
    # 创建线程池，最大并发数为3
    with ThreadPoolExecutor(max_workers=3) as executor:
        # 提交任务到线程池
        futures = []
        for i in range(10):
            duration = random.uniform(1, 5)
            future = executor.submit(task, f"Task-{i}", duration)
            futures.append(future)
        
        # 获取结果
        for future in futures:
            result = future.result()
            print(result)

use_thread_pool()

3. 多线程实战应用

3.1 网络请求并行处理

python

import requests
from concurrent.futures import ThreadPoolExecutor
import time

def fetch_https://blog.csdn.net/url(url):
    try:
        response = requests.get(url, timeout=5)
        return f"{url}: 状态码 {response.status_code}, 内容长度 {len(response.text)}"
    except Exception as e:
        return f"{url}: 错误 {str(e)}"

def parallel_requests():
    urls = [
        "https://www.baidu.com",
        "https://www.taobao.com",
        "https://www.jd.com",
        "https://www.163.com",
        "https://www.sina.com.cn",
        "https://www.qq.com",
        "https://www.sohu.com"
    ]
    
    start_time = time.time()
    
    # 使用线程池并行请求
    with ThreadPoolExecutor(max_workers=5) as executor:
        results = list(executor.map(fetch_url, urls))
    
    end_time = time.time()
    
    for result in results:
        print(result)
    
    print(f"\n总耗时: {end_time - start_time:.2f} 秒")

parallel_requests()

3.2 文件批量处理

python

import os
from concurrent.futures import ThreadPoolExecutor
import time

def process_file(file_path):
    """模拟文件处理操作"""
    try:
        # 模拟一些处理时间
        time.sleep(0.1)
        file_size = os.path.getsize(file_path)
        return f"处理完成: {file_path}, 大小: {file_size} 字节"
    except Exception as e:
        return f"处理失败: {file_path}, 错误: {str(e)}"

def batch_process_files(directory):
    # 获取目录下所有文件
    files = [os.path.join(directory, f) for f in os.listdir(directory) 
             if os.path.isfile(os.path.join(directory, f))]
    
    start_time = time.time()
    
    # 使用线程池并行处理文件
    with ThreadPoolExecutor(max_workers=4) as executor:
        results = list(executor.map(process_file, files))
    
    end_time = time.time()
    
    for result in results:
        print(result)
    
    print(f"\n处理了 {len(files)} 个文件，总耗时: {end_time - start_time:.2f} 秒")

# 示例：处理当前目录下的文件
batch_process_files('.')

3.3 生产者-消费者模式实现

python

import threading
import time
import random
from queue import Queue

class ProducerConsumerPattern:
    def __init__(self, max_size=10):
        self.queue = Queue(maxsize=max_size)
        self.lock = threading.Lock()
        self.producer_done = False
    
    def producer(self, producer_id):
        for i in range(5):
            item = f"生产者{producer_id}-项目{i}"
            # 模拟生产时间
            time.sleep(random.uniform(0.1, 0.5))
            
            self.queue.put(item)
            with self.lock:
                print(f"生产: {item}, 队列大小: {self.queue.qsize()}")
        
        # 标记生产者完成
        if producer_id == 0:  # 只有第一个生产者设置完成标志
            self.producer_done = True
    
    def consumer(self, consumer_id):
        while True:
            try:
                # 设置超时，避免无限等待
                item = self.queue.get(timeout=1)
                # 模拟消费时间
                time.sleep(random.uniform(0.2, 0.8))
                
                with self.lock:
                    print(f"消费者{consumer_id} 消费: {item}")
                
                self.queue.task_done()
            except:
                # 如果队列为空且生产者已完成，则退出
                if self.producer_done and self.queue.empty():
                    break

def run_producer_consumer():
    pc = ProducerConsumerPattern(max_size=5)
    
    # 创建2个生产者和3个消费者
    producers = []
    consumers = []
    
    for i in range(2):
        p = threading.Thread(target=pc.producer, args=(i,))
        producers.append(p)
        p.start()
    
    for i in range(3):
        c = threading.Thread(target=pc.consumer, args=(i,))
        consumers.append(c)
        c.start()
    
    # 等待生产者完成
    for p in producers:
        p.join()
    
    # 等待队列中的所有任务被处理
    pc.queue.join()
    
    # 通知消费者退出
    for c in consumers:
        c.join()
    
    print("所有任务完成")

run_producer_consumer()