Python多线程实战：速度翻倍，效率飙升！

内容分享2个月前发布

明明代码写得没问题，为什么程序跑得这么慢？实则，只需一行代码，就能让你的程序速度翻倍！

在编程世界中，我们总是追求更快的执行速度。想象一下：你需要下载100个网页内容，如果一个个下载，可能要几分钟甚至更久。但使用多线程，可能只需要十几秒！

为什么需要多线程？

先看一个简单的例子：下载多个网页内容

import time
import requests

def download_url(url):
    print(f"开始下载: {url}")
    response = requests.get(url)
    print(f"完成下载: {url}, 长度: {len(response.text)}")

urls = [
    'https://www.example.com',
    'https://www.example.org',
    'https://www.example.net',
    # 更多URL...
]

# 单线程方式
start_time = time.time()
for url in urls:
    download_url(url)
print(f"单线程耗时: {time.time() - start_time:.2f}秒")

运行这段代码，你会发现随着URL数量增加，耗时线性增长。这就是我们需要多线程的缘由！

Python多线程基础

Python通过threading模块提供多线程支持。创建线程的基本方式：

import threading

def print_numbers():
    for i in range(5):
        print(i)

def print_letters():
    for letter in 'ABCDE':
        print(letter)

# 创建线程
t1 = threading.Thread(target=print_numbers)
t2 = threading.Thread(target=print_letters)

# 启动线程
t1.start()
t2.start()

# 等待线程完成
t1.join()
t2.join()

print("所有线程执行完毕!")

实战场景1：网络请求密集型任务

网络请求大部分时间都在等待响应，这是多线程的完美应用场景。

import threading
import requests
import time

def download_url(url):
    response = requests.get(url)
    print(f"{url} 下载完成, 长度: {len(response.text)}")

urls = [
    'https://www.baidu.com',
    'https://www.taobao.com',
    'https://www.jd.com',
    'https://www.163.com',
    'https://www.sina.com.cn'
]

# 多线程下载
start_time = time.time()
threads = []

for url in urls:
    thread = threading.Thread(target=download_url, args=(url,))
    threads.append(thread)
    thread.start()

for thread in threads:
    thread.join()

print(f"多线程下载耗时: {time.time() - start_time:.2f}秒")

更优雅的方式：使用线程池

手动管理线程很麻烦，使用concurrent.futures更简单高效：

from concurrent.futures import ThreadPoolExecutor
import requests
import time

def download_url(url):
    response = requests.get(url)
    return len(response.text)

urls = [
    'https://www.baidu.com',
    'https://www.taobao.com',
    'https://www.jd.com',
    'https://www.163.com',
    'https://www.sina.com.cn'
]

# 使用线程池
start_time = time.time()

with ThreadPoolExecutor(max_workers=5) as executor:
    results = executor.map(download_url, urls)
    
    for url, length in zip(urls, results):
        print(f"{url} 下载完成, 长度: {length}")

print(f"线程池下载耗时: {time.time() - start_time:.2f}秒")

实战场景2：文件处理任务

处理大量文件时，多线程可以显著提高效率：

from concurrent.futures import ThreadPoolExecutor
import os

def process_file(filename):
    """模拟文件处理过程"""
    print(f"开始处理: {filename}")
    # 模拟耗时操作
    with open(filename, 'r') as f:
        content = f.read()
    # 这里可以进行实际的文件处理
    processed = content.upper()
    with open(f"processed_{filename}", 'w') as f:
        f.write(processed)
    print(f"完成处理: {filename}")
    return len(processed)

# 创建一些测试文件
for i in range(5):
    with open(f"file_{i}.txt", 'w') as f:
        f.write(f"这是第{i}个测试文件的内容")

# 使用线程池处理文件
file_list = [f"file_{i}.txt" for i in range(5)]

with ThreadPoolExecutor(max_workers=3) as executor:
    results = executor.map(process_file, file_list)
    
print("所有文件处理完成!")

实战场景3：图像处理任务

虽然图像处理是CPU密集型，但对于批量处理，多线程依旧有用：

from concurrent.futures import ThreadPoolExecutor
from PIL import Image
import os

def process_image(image_path):
    """处理单个图像"""
    print(f"处理图像: {image_path}")
    try:
        with Image.open(image_path) as img:
            # 简单的图像处理：调整大小并转换为灰度图
            img = img.resize((800, 600))
            gray_img = img.convert('L')
            # 保存处理后的图像
            new_path = f"processed_{image_path}"
            gray_img.save(new_path)
        print(f"完成处理: {image_path}")
        return True
    except Exception as e:
        print(f"处理失败 {image_path}: {e}")
        return False

# 假设我们有一些图像文件
image_files = ['image1.jpg', 'image2.jpg', 'image3.jpg']  # 替换为实际文件

# 使用线程池处理图像
with ThreadPoolExecutor(max_workers=2) as executor:
    results = list(executor.map(process_image, image_files))

success_count = sum(results)
print(f"成功处理 {success_count}/{len(image_files)} 张图像")

重大注意事项：线程安全

多线程编程需要注意线程安全问题，特别是在共享数据时：

import threading

# 不安全的方式
counter = 0

def unsafe_increment():
    global counter
    for _ in range(100000):
        counter += 1

# 安全的方式：使用锁
safe_counter = 0
lock = threading.Lock()

def safe_increment():
    global safe_counter
    for _ in range(100000):
        with lock:
            safe_counter += 1

# 测试不安全版本
threads = []
for _ in range(5):
    t = threading.Thread(target=unsafe_increment)
    threads.append(t)
    t.start()

for t in threads:
    t.join()

print(f"不安全计数结果: {counter}")

# 测试安全版本
threads = []
for _ in range(5):
    t = threading.Thread(target=safe_increment)
    threads.append(t)
    t.start()

for t in threads:
    t.join()

print(f"安全计数结果: {safe_counter}")

实际性能测试对比

让我们对比一下单线程和多线程的性能差异：

from concurrent.futures import ThreadPoolExecutor
import time
import requests

def task(n):
    """模拟一个耗时任务"""
    time.sleep(1)  # 模拟I/O操作
    return n * n

# 单线程执行
start_time = time.time()
results = []
for i in range(10):
    results.append(task(i))
single_thread_time = time.time() - start_time

# 多线程执行
start_time = time.time()
with ThreadPoolExecutor(max_workers=5) as executor:
    results = list(executor.map(task, range(10)))
multi_thread_time = time.time() - start_time

print(f"单线程耗时: {single_thread_time:.2f}秒")
print(f"多线程耗时: {multi_thread_time:.2f}秒")
print(f"性能提升: {single_thread_time/multi_thread_time:.1f}倍")

最佳实践总结

适用场景：多线程最适合I/O密集型任务（网络请求、文件操作、数据库查询）
线程数量：一般设置为CPU核心数的2-5倍，但需要根据实际测试调整
使用线程池：避免频繁创建销毁线程的开销，使用ThreadPoolExecutor
线程安全：对共享资源使用锁机制，避免竞态条件
异常处理：确保线程中的异常能被正确捕获和处理

# 最佳实践示例
from concurrent.futures import ThreadPoolExecutor, as_completed

def robust_task(url):
    try:
        response = requests.get(url, timeout=5)
        return response.status_code
    except Exception as e:
        return f"Error: {e}"

urls = [...]  # 你的URL列表

with ThreadPoolExecutor(max_workers=5) as executor:
    # 提交所有任务
    future_to_url = {executor.submit(robust_task, url): url for url in urls}
    
    # 处理完成的任务
    for future in as_completed(future_to_url):
        url = future_to_url[future]
        try:
            result = future.result()
            print(f"{url}: {result}")
        except Exception as e:
            print(f"{url} generated exception: {e}")

结语

多线程编程可以显著提升程序的执行效率，特别是在处理I/O密集型任务时。通过本文的案例和实践，你应该已经掌握了Python多线程的基本用法和最佳实践。

© 版权声明

文章版权归作者所有，未经允许请勿转载。

相关文章

偷懒的结果就是查重率不高，AIGC高的离谱，搜罗一套降AIGC的方法

偷懒的结果就是查重率不高，AIGC高的离谱，搜罗一套降AIGC的方法

2个月前

100

属鼠人2026年好运拉满。接住旺气顺一整年

属鼠人2026年好运拉满。接住旺气顺一整年

2周前

020

为母亲雇男护工，数天后发现他暗中换药，女儿知道原因后彻底崩溃

为母亲雇男护工，数天后发现他暗中换药，女儿知道原因后彻底崩溃

3周前

000

警告：JavaScript中的这个老友即将告别！

警告：JavaScript中的这个老友即将告别！

2个月前

010

7 条评论

晨晨读者

很强，学习了🤙

2个月前无记录

回复
浅浅地微笑淡淡的温柔读者

继续加油💪

2个月前无记录

回复
啊不之读者

大神💪

2个月前无记录

回复
风吹铃铛响叮叮投稿者

收藏了，感谢分享

2个月前无记录

回复
碳酸饮料拜拜投稿者

学到了💪

2个月前无记录

回复
婉瑜s 投稿者

线程池一笔带过

2个月前无记录

回复
阿璇学妹投稿者

大佬带带我👏

2个月前无记录

回复