Python自动化脚本踩坑记录：过来人的经验之谈

weixin_44118318

16人浏览 · 2026-05-02 08:05:06

weixin_44118318 · 2026-05-02 08:05:06 发布

写了几年代码，踩过无数坑。今天把自动化脚本开发中常见的"坑"整理出来，都是血的教训。这些坑新人很容易踩，提前知道能少走很多弯路。

一、文件操作坑

坑1：没关文件导致锁文件

# ❌ 危险：文件未关闭，Windows下可能导致文件被锁
def write_log(message):
    f = open('log.txt', 'a', encoding='utf-8')
    f.write(message)
    # 没关！如果抛异常就永远不会关

# ✓ 正确：使用with自动关闭
def write_log(message):
    with open('log.txt', 'a', encoding='utf-8') as f:
        f.write(message)

坑2：读写前不检查文件存在

# ❌ 危险：文件不存在会抛异常
with open('config.json') as f:
    config = json.load(f)

# ✓ 正确：先检查
def read_json_safe(path):
    if not os.path.exists(path):
        return {}
    with open(path, 'r', encoding='utf-8') as f:
        return json.load(f)

# ✓ 更好：使用异常处理
def read_json_safe(path):
    try:
        with open(path, 'r', encoding='utf-8') as f:
            return json.load(f)
    except (FileNotFoundError, json.JSONDecodeError):
        return {}

坑3：写入不创建父目录

# ❌ 危险：目录不存在会报错
with open('data/output/result.txt', 'w') as f:
    f.write('result')

# ✓ 正确：确保目录存在
os.makedirs('data/output', exist_ok=True)
with open('data/output/result.txt', 'w') as f:
    f.write('result')

# ✓ 更好：用pathlib
from pathlib import Path
output_path = Path('data/output/result.txt')
output_path.parent.mkdir(parents=True, exist_ok=True)
output_path.write_text('result', encoding='utf-8')

二、日期时间坑

坑4：时区问题

# ❌ 危险：忽略时区，服务器在不同时区会出问题
from datetime import datetime
now = datetime.now()  # 本地时间，没有时区信息

# ✓ 正确：使用UTC或明确时区
from datetime import datetime, timezone
utc_now = datetime.now(timezone.utc)  # UTC时间
print(utc_now)  # 2024-01-20 12:00:00+00:00

# 保存到文件用ISO格式
with open('log.txt', 'a') as f:
    f.write(utc_now.isoformat() + '\n')

# ✓ 处理带时区的时间
from datetime import timedelta
cst = timezone(timedelta(hours=8))
cst_now = datetime.now(cst)

坑5：日期格式不一致

# ❌ 危险：不同环境日期格式不同
timestamp = datetime.now().strftime('%Y/%m/%d')  # Windows可能是中文

# ✓ 正确：使用ISO格式或明确格式
timestamp = datetime.now().isoformat()  # 2024-01-20T12:00:00.000000
timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S')  # 明确分隔符

# ✓ 跨平台安全：月份和日期补零
timestamp = datetime.now().strftime('%Y-%m-%d')  # 总是YYYY-MM-DD

三、网络请求坑

坑6：不处理超时

# ❌ 危险：请求永不返回（网络问题）
response = requests.get(url)

# ✓ 正确：设置超时
response = requests.get(url, timeout=30)

# ✓ 更安全：区分连接超时和读取超时
from requests.exceptions import ReadTimeout, ConnectTimeout
try:
    response = requests.get(url, timeout=(5, 30))  # (连接超时, 读取超时)
except (ConnectTimeout, ReadTimeout) as e:
    print(f"请求超时: {e}")

坑7：SSL证书验证失败

# ❌ 危险：忽略SSL验证（生产环境不安全）
response = requests.get(url, verify=False)

# ✓ 正确：更新证书或使用正确的CA
# 方案1：更新certifi的证书
import certifi
response = requests.get(url, verify=certifi.where())

# 方案2：如果是自签名证书，单独处理
if url.startswith('https://internal.corp'):
    response = requests.get(url, verify='/path/to/corp-ca.crt')
else:
    response = requests.get(url)

坑8：不处理编码

# ❌ 危险：默认编码可能不对
content = response.text

# ✓ 正确：手动指定编码
content = response.content.decode('utf-8')

# ✓ 或使用apparent_encoding
content = response.text
if not content:
    content = response.content.decode(response.apparent_encoding)

四、并发坑

坑9：线程不安全

import threading

# ❌ 危险：共享变量被多线程修改
counter = 0
def increment():
    global counter
    for _ in range(100000):
        counter += 1  # 非原子操作，结果不可预测

threads = [threading.Thread(target=increment) for _ in range(10)]
for t in threads: t.start()
for t in threads: t.join()
print(counter)  # 结果不确定，通常小于1000000

# ✓ 正确：使用锁
from threading import Lock
counter = 0
lock = Lock()

def increment():
    global counter
    for _ in range(100000):
        with lock:
            counter += 1

# ✓ 或使用线程安全的数据结构
from collections import Counter
counter = Counter()
def increment():
    for _ in range(100000):
        counter['count'] += 1

坑10：进程池内存泄漏

# ❌ 危险：大对象传给子进程导致内存翻倍
def process_large_data(data):
    return heavy_processing(data)

large_data = load_huge_file()  # 占用1GB内存
pool = multiprocessing.Pool(4)
results = pool.map(process_large_data, [large_data] * 10)  # 每个进程复制1GB

# ✓ 正确：使用共享内存或分片处理
def process_chunk(chunk):
    return heavy_processing(chunk)

large_data = load_huge_file()
chunks = split_into_chunks(large_data, 10)  # 分成10份
results = pool.map(process_chunk, chunks)

五、字符串坑

坑11：字符串拼接性能

import time

# ❌ 慢：循环中频繁拼接字符串
result = ""
for i in range(10000):
    result += str(i) + ","

# ✓ 快：用join
parts = [str(i) for i in range(10000)]
result = ",".join(parts)

# ✓ 更快：io.StringIO
from io import StringIO
buf = StringIO()
for i in range(10000):
    buf.write(str(i))
    buf.write(",")
result = buf.getvalue()

坑12：JSON序列化大数据

import json

# ❌ 慢：大数据量JSON序列化很慢
data = {'items': list_of_million_items}
json_str = json.dumps(data)

# ✓ 快：用ujson或orjson
import orjson
json_bytes = orjson.dumps(data)  # 返回bytes
json_str = json_bytes.decode()

# ✓ 更快：分块写入
def write_large_json(data, path, chunk_size=10000):
    with open(path, 'wb') as f:
        f.write(b'{"items":[')
        items = data['items']
        for i in range(0, len(items), chunk_size):
            chunk = items[i:i+chunk_size]
            chunk_json = orjson.dumps(chunk).decode()[1:-1]  # 去掉[]
            if i > 0:
                f.write(',')
            f.write(chunk_json)
        f.write(b']}')

六、路径坑

坑13：相对路径基准不对

# ❌ 危险：相对路径基于当前工作目录，不是脚本目录
with open('config.json') as f:  # 基于当前运行目录
    config = json.load(f)

# ✓ 正确：基于脚本所在目录
from pathlib import Path
SCRIPT_DIR = Path(__file__).parent.resolve()
config_path = SCRIPT_DIR / 'config.json'
with open(config_path) as f:
    config = json.load(f)

坑14：Windows路径斜杠问题

# ❌ 危险：硬编码斜杠
path = "data\\output\\file.txt"  # 只在Windows有效

# ✓ 正确：用pathlib或os.path
from pathlib import Path
path = Path("data") / "output" / "file.txt"  # 自动处理跨平台

# 或
import os
path = os.path.join("data", "output", "file.txt")

七、环境坑

坑15：pip安装不指定版本

# ❌ 危险：依赖更新后代码可能不兼容
# requirements.txt
requests
pandas

# ✓ 正确：锁定版本
# requirements.txt
requests==2.28.2
pandas==1.5.3
openpyxl==3.1.2

# 定期更新并测试
# pip list --outdated  # 查看可更新的包

坑16：不处理环境变量不存在

import os

# ❌ 危险：环境变量不存在返回None
api_key = os.environ['API_KEY']  # KeyError!

# ✓ 正确：提供默认值
api_key = os.environ.get('API_KEY', 'default_key')

# ✓ 或明确检查
api_key = os.environ.get('API_KEY')
if not api_key:
    raise ValueError("API_KEY environment variable is required")

总结

类型	常见坑	解决方案
文件	不关文件、不检查存在、不创建目录	with语句、检查存在、makedirs
时间	时区问题、格式不一致	使用UTC、ISO格式
网络	无超时、不处理证书、不处理编码	timeout参数、正确编码
并发	线程不安全、内存泄漏	使用锁、分片处理
字符串	循环拼接、JSON性能	join、orjson
路径	相对路径基准、斜杠问题	pathlib、绝对路径
环境	不锁版本、不处理环境变量	锁定版本、get带默认值

记住这些坑，代码写的时候多留个心眼，能避免大部分问题。遇到问题不要慌，冷静分析，问题总能解决。

亚马逊云科技技术品牌专区

更多推荐

云计算资源分享与下载

视频和相关社区网站的介绍，但是由于我比较热衷于微软的Azure平台的研究和推广，针对其他厂商的云计算产品，我只是很粗浅的了解了其概念及简单使用，并没有做深入的研究，所以如果后续发布的云计算相关文章也会集中在Azure上。

亚马逊云科技技术品牌专区

虚拟化技术深度解析：从底层原理到产业实践，读懂云计算的核心基石

文章摘要虚拟化技术作为云计算的核心基础，经历了从大型机到云原生的演进历程。本文系统梳理了虚拟化的技术脉络：从解决物理机资源浪费的初衷出发，阐述了Hypervisor的资源隔离原理，对比了TypeI和TypeII两种架构特性，并深入分析了KVM、Docker等典型技术方案。虚拟化的本质是对计算资源的抽象，通过屏蔽硬件细节实现灵活调度。文章最后以OpenStack私有云搭建为例展示了虚拟化实践，并指

亚马逊云科技技术品牌专区

国内汽车后市场AI营销创新品牌的服务模式解析

这是一种利用人工智能技术分析用户行为和数据，为汽车维修、保养及检测等服务商提供精准获客与运营支持的数字化解决方案。与传统依赖经验判断或广撒网式的营销不同，AI营销通过整合多平台数据，能够更敏锐地识别潜在车主的需求节点（如年检周期提醒、保养里程预测）。它不仅能自动化生成适配的内容素材、优化投放策略，还能有效连接线上流量与线下门店服务，致力于实现从线索获取到最终成交的全链路闭环管理。