第 4 章 传统计算机视觉算法¶
📚 章节概述¶
本章介绍传统计算机视觉的核心算法,包括图像分割、目标识别、光流估计等。这些算法是深度学习时代之前的代表性方法,理解它们对于深入理解计算机视觉原理非常重要。
学习时间: 5-7 天 难度等级:⭐⭐⭐⭐ 前置知识:第 1-3 章
🎯 学习目标¶
完成本章后,你将能够: - 理解传统 CV 算法的原理和局限性 - 掌握图像分割的基本方法 - 了解传统目标识别技术 - 理解光流估计的原理 - 能够使用传统方法解决实际问题
4.1 图像分割¶
4.1.1 阈值分割¶
import cv2
import numpy as np
image = cv2.imread('image.jpg')
if image is None:
raise FileNotFoundError('image.jpg 不存在或无法读取')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# 全局阈值
ret, thresh1 = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)
# Otsu 阈值(在双峰直方图假设较合理时,自动选择使类间方差最大的阈值)
ret2, thresh2 = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
# 自适应阈值
thresh3 = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C,
cv2.THRESH_BINARY, 11, 2)
4.1.2 区域生长¶
def region_growing(image, seed, threshold=10):
"""区域生长算法"""
h, w = image.shape
visited = np.zeros((h, w), dtype=bool)
region = np.zeros((h, w), dtype=np.uint8)
# 种子点
seed_value = image[seed]
stack = [seed]
while stack:
x, y = stack.pop()
if visited[x, y]:
continue
visited[x, y] = True
if abs(int(image[x, y]) - int(seed_value)) <= threshold:
region[x, y] = 255
# 添加邻居
for dx, dy in [(-1, 0), (1, 0), (0, -1), (0, 1)]:
nx, ny = x + dx, y + dy
if 0 <= nx < h and 0 <= ny < w and not visited[nx, ny]:
stack.append((nx, ny))
return region
4.1.3 分水岭算法¶
# 分水岭分割
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
ret, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
# 噪声去除
kernel = np.ones((3, 3), np.uint8)
opening = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=2)
# 确定背景区域
sure_bg = cv2.dilate(opening, kernel, iterations=3)
# 确定前景区域
dist_transform = cv2.distanceTransform(opening, cv2.DIST_L2, 5)
ret, sure_fg = cv2.threshold(dist_transform, 0.7 * dist_transform.max(), 255, 0)
# 找到未知区域
sure_fg = np.uint8(sure_fg)
unknown = cv2.subtract(sure_bg, sure_fg)
# 标记
ret, markers = cv2.connectedComponents(sure_fg)
markers = markers + 1
markers[unknown == 255] = 0
# 分水岭
markers = cv2.watershed(image, markers)
image[markers == -1] = [0, 0, 255]
4.1.4 GrabCut 算法¶
# GrabCut交互式分割
mask = np.zeros(image.shape[:2], np.uint8) # 切片操作,取前n个元素
bgd_model = np.zeros((1, 65), np.float64)
fgd_model = np.zeros((1, 65), np.float64)
# 定义ROI(矩形)
rect = (50, 50, 450, 290)
# 应用GrabCut
cv2.grabCut(image, mask, rect, bgd_model, fgd_model, 5, cv2.GC_INIT_WITH_RECT)
# 修改掩码
mask2 = np.where((mask == 2) | (mask == 0), 0, 1).astype('uint8')
result = image * mask2[:, :, np.newaxis]
4.2 传统目标识别¶
4.2.1 HOG + SVM¶
import cv2
from skimage.feature import hog
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
# 提取 HOG 特征
def extract_hog_features(image):
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
features = hog(
gray,
orientations=9,
pixels_per_cell=(8, 8),
cells_per_block=(2, 2),
visualize=False,
feature_vector=True,
)
return features
# 训练 SVM 分类器
def train_svm(features, labels):
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)
svm = SVC(kernel='linear', C=1.0)
svm.fit(X_train, y_train)
return svm
# 滑动窗口检测
def sliding_window(image, step_size, window_size):
win_w, win_h = window_size
for y in range(0, image.shape[0] - win_h + 1, step_size):
for x in range(0, image.shape[1] - win_w + 1, step_size):
yield x, y, image[y:y + win_h, x:x + win_w]
4.2.2 Viola-Jones 人脸检测¶
# 加载预训练的人脸检测器
face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
eye_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_eye.xml')
if face_cascade.empty() or eye_cascade.empty():
raise RuntimeError('未能加载 Haar Cascade,请检查 OpenCV 数据文件是否完整')
# 检测人脸
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
faces = face_cascade.detectMultiScale(gray, 1.3, 5)
for (x, y, w, h) in faces:
cv2.rectangle(image, (x, y), (x+w, y+h), (255, 0, 0), 2)
roi_gray = gray[y:y+h, x:x+w]
roi_color = image[y:y+h, x:x+w]
# 检测眼睛
eyes = eye_cascade.detectMultiScale(roi_gray)
for (ex, ey, ew, eh) in eyes:
cv2.rectangle(roi_color, (ex, ey), (ex+ew, ey+eh), (0, 255, 0), 2)
4.3 光流估计¶
4.3.1 稀疏光流( Lucas-Kanade )¶
# Lucas-Kanade 光流
prev_gray = cv2.cvtColor(prev_frame, cv2.COLOR_BGR2GRAY)
curr_gray = cv2.cvtColor(curr_frame, cv2.COLOR_BGR2GRAY)
prev_pts = cv2.goodFeaturesToTrack(prev_gray, maxCorners=100, qualityLevel=0.3, minDistance=7)
if prev_pts is None:
raise RuntimeError('上一帧没有检测到可跟踪角点')
lk_params = dict(winSize=(15, 15), maxLevel=2,
criteria=(cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 10, 0.03))
next_pts, status, err = cv2.calcOpticalFlowPyrLK(prev_gray, curr_gray, prev_pts, None, **lk_params)
if next_pts is None or status is None:
raise RuntimeError('光流计算失败')
good_prev = prev_pts[status.flatten() == 1]
good_next = next_pts[status.flatten() == 1]
for prev, nxt in zip(good_prev, good_next):
prev = prev.ravel()
nxt = nxt.ravel()
cv2.line(curr_frame, (int(prev[0]), int(prev[1])), (int(nxt[0]), int(nxt[1])), (0, 255, 0), 2)
4.3.2 稠密光流( Farneback )¶
# Farneback稠密光流
flow = cv2.calcOpticalFlowFarneback(prev_gray, curr_gray, None, 0.5, 3, 15, 3, 5, 1.2, 0)
# 可视化光流
hsv = np.zeros_like(prev_frame)
hsv[..., 1] = 255
mag, ang = cv2.cartToPolar(flow[..., 0], flow[..., 1])
hsv[..., 0] = ang * 180 / np.pi / 2
hsv[..., 2] = cv2.normalize(mag, None, 0, 255, cv2.NORM_MINMAX)
bgr = cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR)
4.4 练习题¶
基础题¶
- 简答题:
- 图像分割有哪些方法?
主要方法:①阈值分割(全局阈值、自适应阈值、 Otsu );②区域生长;③分水岭算法;④GrabCut (基于图割);⑤K-means 聚类分割;⑥基于图的方法( Graph Cut )。
- Otsu 阈值法的原理是什么?
Otsu 法自动寻找使类间方差最大的阈值。它默认前景/背景在灰度直方图上具有相对可分的统计结构;在该准则下,类间方差最大的阈值是“最优”的,但当光照不均、类别分布严重重叠或目标不是双峰结构时,效果可能明显下降。
进阶题¶
- 编程题:
- 实现 K-means 图像分割。
- 使用 HOG+SVM 实现简单目标检测。
4.5 关键复盘问题¶
高频复盘题¶
Q1: 传统图像分割方法有哪些?
参考答案: - 阈值分割(全局、自适应、 Otsu ) - 区域生长 - 分水岭算法 - GrabCut - K-means 聚类 - 基于图的方法( Graph Cut )
Q2: HOG 特征的原理是什么?
参考答案: - 计算图像梯度 - 将图像划分为小块 - 统计每个块的梯度方向直方图 - 形成特征向量 - 对光照和几何变化鲁棒
4.6 本章小结¶
核心知识点¶
- 图像分割:阈值、区域生长、分水岭、 GrabCut
- 目标识别: HOG+SVM 、 Viola-Jones
- 光流估计: Lucas-Kanade 、 Farneback
下一步¶
下一章:05-卷积神经网络基础.md - 学习 CNN 基础
恭喜完成第 4 章! 🎉
⚠️ 核验说明(2026-04-03):本页已完成逐段人工复核,并为 Otsu 与 Haar Cascade 示例补充了适用前提和加载检查。若文中涉及外部模型、API、版本号、价格、部署依赖或第三方产品名称,请以官方文档、论文原文和实际运行环境为准。
最后更新日期: 2026-04-03