python：函数的进阶特性：常用的高阶函数

什么是高阶函数？

高阶函数（Higher-order Function）是指能够接受函数作为参数或者将函数作为返回值的函数。在Python中，有几个内置的高阶函数超级常用：map()、filter()、reduce()。

1. map() 函数

基本概念

map() 函数将一个函数应用于一个或多个可迭代对象（如列表）的所有元素，并返回一个迭代器。

语法

map(function, iterable, ...)

基础示例

# 将列表中的每个数字平方
numbers = [1, 2, 3, 4, 5]

# 使用普通函数
def square(x):
    return x ** 2

squared_numbers = map(square, numbers)
print(list(squared_numbers))  # 输出: [1, 4, 9, 16, 25]

# 使用lambda函数（更简洁）
squared_numbers = map(lambda x: x ** 2, numbers)
print(list(squared_numbers))  # 输出: [1, 4, 9, 16, 25]

多个可迭代对象的map

# 两个列表对应位置相加
list1 = [1, 2, 3]
list2 = [4, 5, 6]

result = map(lambda x, y: x + y, list1, list2)
print(list(result))  # 输出: [5, 7, 9]

# 三个列表处理
list3 = [10, 20, 30]
result = map(lambda x, y, z: x + y + z, list1, list2, list3)
print(list(result))  # 输出: [15, 27, 39]

实际应用

# 处理学生成绩：将百分制转换为等级
scores = [85, 92, 78, 60, 45, 95]

def score_to_grade(score):
    if score >= 90:
        return "A"
    elif score >= 80:
        return "B"
    elif score >= 70:
        return "C"
    elif score >= 60:
        return "D"
    else:
        return "F"

grades = map(score_to_grade, scores)
print(list(grades))  # 输出: ['B', 'A', 'C', 'D', 'F', 'A']

# 处理字符串：将名字转换为首字母大写
names = ["alice", "bob", "charlie", "diana"]
proper_names = map(lambda name: name.title(), names)
print(list(proper_names))  # 输出: ['Alice', 'Bob', 'Charlie', 'Diana']

2. filter() 函数

基本概念

filter() 函数用于过滤序列，过滤掉不符合条件的元素，返回由符合条件元素组成的迭代器。

语法

filter(function, iterable)

基础示例

# 过滤出偶数
numbers = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]

# 使用普通函数
def is_even(x):
    return x % 2 == 0

even_numbers = filter(is_even, numbers)
print(list(even_numbers))  # 输出: [2, 4, 6, 8, 10]

# 使用lambda函数
even_numbers = filter(lambda x: x % 2 == 0, numbers)
print(list(even_numbers))  # 输出: [2, 4, 6, 8, 10]

实际应用

# 过滤出及格的学生
students = [
    {"name": "小明", "score": 85},
    {"name": "小红", "score": 92},
    {"name": "小刚", "score": 58},
    {"name": "小丽", "score": 76},
    {"name": "小华", "score": 45}
]

passed_students = filter(lambda student: student["score"] >= 60, students)
print(list(passed_students))
# 输出: [{'name': '小明', 'score': 85}, {'name': '小红', 'score': 92}, {'name': '小丽', 'score': 76}]

# 过滤出非空字符串
words = ["hello", "", "world", " ", "python", "", "programming"]
non_empty = filter(lambda word: word.strip() != "", words)
print(list(non_empty))  # 输出: ['hello', 'world', 'python', 'programming']

# 过滤出有效的邮箱地址
emails = [
    "user@example.com",
    "invalid-email",
    "test@gmail.com",
    "another@domain.org",
    "not-an-email"
]

def is_valid_email(email):
    return "@" in email and "." in email and len(email) > 5

valid_emails = filter(is_valid_email, emails)
print(list(valid_emails))
# 输出: ['user@example.com', 'test@gmail.com', 'another@domain.org']

3. reduce() 函数

基本概念

reduce() 函数会对参数序列中的元素进行累积操作。它需要从 functools 模块导入。

语法

from functools import reduce

# 计算列表元素的乘积
numbers = [1, 2, 3, 4, 5]

# 使用普通函数
def multiply(x, y):
    return x * y

product = reduce(multiply, numbers)
print(product)  # 输出: 120

# 使用lambda函数
product = reduce(lambda x, y: x * y, numbers)
print(product)  # 输出: 120

reduce() 的执行过程

# 理解reduce的工作过程
numbers = [1, 2, 3, 4, 5]

# 手动模拟reduce的过程
def manual_reduce(func, sequence):
    result = sequence[0]
    for item in sequence[1:]:
        result = func(result, item)
    return result

product = manual_reduce(lambda x, y: x * y, numbers)
print(product)  # 输出: 120

实际应用

from functools import reduce

# 找出列表中的最大值
numbers = [3, 7, 2, 9, 1, 8]
max_number = reduce(lambda x, y: x if x > y else y, numbers)
print(f"最大值: {max_number}")  # 输出: 最大值: 9

# 拼接字符串列表
words = ["Hello", "World", "Python", "Programming"]
sentence = reduce(lambda x, y: x + " " + y, words)
print(sentence)  # 输出: Hello World Python Programming

# 计算阶乘
def factorial(n):
    return reduce(lambda x, y: x * y, range(1, n + 1))

print(f"5! = {factorial(5)}")  # 输出: 5! = 120

# 使用initializer参数（初始值）
numbers = [1, 2, 3, 4, 5]
# 从10开始累加
sum_from_10 = reduce(lambda x, y: x + y, numbers, 10)
print(f"从10开始累加: {sum_from_10}")  # 输出: 从10开始累加: 25

4. sorted() 函数的高级用法

虽然 sorted() 不是严格意义上的高阶函数，但它常常与函数参数一起使用。

使用key参数

# 按字符串长度排序
words = ["apple", "banana", "cherry", "date", "elderberry"]
sorted_by_length = sorted(words, key=len)
print(sorted_by_length)  # 输出: ['date', 'apple', 'banana', 'cherry', 'elderberry']

# 按学生成绩排序
students = [
    {"name": "小明", "score": 85},
    {"name": "小红", "score": 92},
    {"name": "小刚", "score": 78},
    {"name": "小丽", "score": 95}
]

# 按成绩降序排列
sorted_students = sorted(students, key=lambda student: student["score"], reverse=True)
for student in sorted_students:
    print(f"{student['name']}: {student['score']}")
# 输出:
# 小丽: 95
# 小红: 92
# 小明: 85
# 小刚: 78

5. 高阶函数的组合使用

map + filter 组合

# 找出大于50的数字并计算它们的平方
numbers = [30, 45, 60, 75, 80, 25, 95]

# 先过滤，再映射
result = map(lambda x: x ** 2, filter(lambda x: x > 50, numbers))
print(list(result))  # 输出: [3600, 5625, 6400, 9025]

# 或者先映射，再过滤（效率较低）
result = filter(lambda x: x > 2500, map(lambda x: x ** 2, numbers))
print(list(result))  # 输出: [3600, 5625, 6400, 9025]

reduce + map 组合

from functools import reduce

# 计算列表中所有偶数的平方和
numbers = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]

# 方法1：先过滤偶数，再平方，再求和
result = reduce(
    lambda x, y: x + y,
    map(lambda x: x ** 2, filter(lambda x: x % 2 == 0, numbers))
)
print(f"偶数的平方和: {result}")  # 输出: 偶数的平方和: 220

# 方法2：使用列表推导式（更Pythonic的方式）
result = sum(x ** 2 for x in numbers if x % 2 == 0)
print(f"偶数的平方和: {result}")  # 输出: 偶数的平方和: 220

6. 实际项目案例

案例1：数据处理管道

from functools import reduce

# 原始数据：学生信息列表
students = [
    {"name": "张三", "math": 85, "english": 78, "chinese": 92},
    {"name": "李四", "math": 92, "english": 88, "chinese": 76},
    {"name": "王五", "math": 78, "english": 65, "chinese": 85},
    {"name": "赵六", "math": 45, "english": 52, "chinese": 60},
    {"name": "钱七", "math": 88, "english": 79, "chinese": 91}
]

# 数据处理管道
def process_student_data(students):
    # 1. 过滤：只保留所有科目都及格的学生
    passed_students = filter(
        lambda s: all(score >= 60 for score in [s["math"], s["english"], s["chinese"]]),
        students
    )
    
    # 2. 映射：计算每个学生的平均分并添加到信息中
    students_with_avg = map(
        lambda s: {
            **s,
            "average": round((s["math"] + s["english"] + s["chinese"]) / 3, 2)
        },
        passed_students
    )
    
    # 3. 排序：按平均分降序排列
    sorted_students = sorted(
        students_with_avg,
        key=lambda s: s["average"],
        reverse=True
    )
    
    return list(sorted_students)

# 使用
result = process_student_data(students)
for student in result:
    print(f"{student['name']}: 平均分 {student['average']}")

案例2：电商数据分析

from functools import reduce

# 模拟订单数据
orders = [
    {"order_id": 1, "customer": "Alice", "amount": 150.0, "items": 3},
    {"order_id": 2, "customer": "Bob", "amount": 75.5, "items": 2},
    {"order_id": 3, "customer": "Alice", "amount": 220.0, "items": 5},
    {"order_id": 4, "customer": "Charlie", "amount": 45.0, "items": 1},
    {"order_id": 5, "customer": "Bob", "amount": 180.0, "items": 4},
    {"order_id": 6, "customer": "Alice", "amount": 95.0, "items": 2}
]

# 分析任务1：计算总销售额
total_sales = reduce(lambda x, y: x + y["amount"], orders, 0)
print(f"总销售额: ${total_sales:.2f}")

# 分析任务2：找出大额订单（金额 > 100）
big_orders = list(filter(lambda order: order["amount"] > 100, orders))
print(f"大额订单数量: {len(big_orders)}")

# 分析任务3：按客户分组统计
def group_by_customer(orders):
    customers = {}
    for order in orders:
        customer = order["customer"]
        if customer not in customers:
            customers[customer] = []
        customers[customer].append(order)
    return customers

customer_orders = group_by_customer(orders)

# 计算每个客户的总消费
customer_totals = {
    customer: reduce(lambda total, order: total + order["amount"], orders, 0)
    for customer, orders in customer_orders.items()
}

print("客户消费统计:")
for customer, total in customer_totals.items():
    print(f"  {customer}: ${total:.2f}")

7. 性能思考和替代方案

列表推导式 vs map/filter

numbers = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]

# 使用map/filter
result1 = list(map(lambda x: x ** 2, filter(lambda x: x % 2 == 0, numbers)))

# 使用列表推导式（一般更推荐）
result2 = [x ** 2 for x in numbers if x % 2 == 0]

print(result1)  # 输出: [4, 16, 36, 64, 100]
print(result2)  # 输出: [4, 16, 36, 64, 100]
print(result1 == result2)  # 输出: True

生成器表达式

对于大数据集，使用生成器可以节省内存：

# 列表推导式：立即创建整个列表
squares_list = [x ** 2 for x in range(1000000)]  # 占用大量内存

# 生成器表达式：按需生成元素
squares_generator = (x ** 2 for x in range(1000000))  # 内存友善

# 在reduce中使用生成器
from functools import reduce
total = reduce(lambda x, y: x + y, (x ** 2 for x in range(1000) if x % 2 == 0))
print(total)