243 lines
8.6 KiB
Python
243 lines
8.6 KiB
Python
import os
|
||
import cv2
|
||
import numpy as np
|
||
from typing import List, Tuple, Union, Optional
|
||
from PIL import Image
|
||
|
||
ArrayLikeImage = Union[np.ndarray, str, Image.Image]
|
||
|
||
class OCRUtils:
|
||
@classmethod
|
||
def _to_gray(cls, img: ArrayLikeImage) -> np.ndarray:
|
||
"""
|
||
接受路径/np.ndarray/PIL.Image,统一转为灰度 np.ndarray。
|
||
"""
|
||
# 路径
|
||
if isinstance(img, str):
|
||
arr = cv2.imread(img, cv2.IMREAD_GRAYSCALE)
|
||
if arr is None:
|
||
raise FileNotFoundError(f"图像加载失败,请检查路径: {img}")
|
||
return arr
|
||
|
||
# PIL.Image
|
||
if isinstance(img, Image.Image):
|
||
return cv2.cvtColor(np.array(img.convert("RGB")), cv2.COLOR_RGB2GRAY)
|
||
|
||
# numpy 数组
|
||
if isinstance(img, np.ndarray):
|
||
if img.ndim == 2:
|
||
return img # 已是灰度
|
||
if img.ndim == 3:
|
||
return cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
||
raise ValueError("不支持的图像维度(期望 2D 灰度或 3D BGR/RGB)")
|
||
|
||
raise TypeError("large_image 类型必须是 str / np.ndarray / PIL.Image.Image")
|
||
|
||
@classmethod
|
||
def non_max_suppression(
|
||
cls,
|
||
boxes: List[List[float]],
|
||
scores: Optional[np.ndarray] = None,
|
||
overlapThresh: float = 0.5
|
||
) -> np.ndarray:
|
||
"""
|
||
boxes: [ [x1,y1,x2,y2], ... ]
|
||
scores: 每个框的置信度(用于“按分数做 NMS”)。若为 None,则退化为按 y2 排序的经典近似。
|
||
返回: 经过 NMS 保留的 boxes(int) ndarray,形状 (N,4)
|
||
"""
|
||
if len(boxes) == 0:
|
||
return np.empty((0, 4), dtype=int)
|
||
|
||
boxes = np.asarray(boxes, dtype=np.float32)
|
||
x1, y1, x2, y2 = boxes.T
|
||
areas = (x2 - x1 + 1) * (y2 - y1 + 1)
|
||
|
||
if scores is None:
|
||
order = np.argsort(y2) # 经典写法
|
||
else:
|
||
scores = np.asarray(scores, dtype=np.float32)
|
||
order = np.argsort(scores)[::-1] # 分数从高到低
|
||
|
||
keep = []
|
||
while order.size > 0:
|
||
i = order[0] if scores is not None else order[-1]
|
||
keep.append(i)
|
||
|
||
rest = order[1:] if scores is not None else order[:-1]
|
||
|
||
xx1 = np.maximum(x1[i], x1[rest])
|
||
yy1 = np.maximum(y1[i], y1[rest])
|
||
xx2 = np.minimum(x2[i], x2[rest])
|
||
yy2 = np.minimum(y2[i], y2[rest])
|
||
|
||
w = np.maximum(0, xx2 - xx1 + 1)
|
||
h = np.maximum(0, yy2 - yy1 + 1)
|
||
inter = w * h
|
||
ovr = inter / areas[rest]
|
||
|
||
inds = np.where(ovr <= overlapThresh)[0]
|
||
order = rest[inds]
|
||
|
||
return boxes[keep].astype(int)
|
||
|
||
# @classmethod
|
||
# def find_template(
|
||
# cls,
|
||
# template_path: str,
|
||
# large_image: ArrayLikeImage,
|
||
# threshold: float = 0.8,
|
||
# overlapThresh: float = 0.5,
|
||
# return_boxes: bool = False
|
||
# ) -> Union[List[Tuple[int, int]], Tuple[List[Tuple[int, int]], np.ndarray]]:
|
||
# """
|
||
# 在 large_image 中查找 template_path 模板的位置。
|
||
# - large_image 可为文件路径、np.ndarray 或 PIL.Image
|
||
# - threshold: 模板匹配阈值(TM_CCOEFF_NORMED)
|
||
# - overlapThresh: NMS 重叠阈值
|
||
# - return_boxes: True 时同时返回保留的框数组 (N,4)
|
||
#
|
||
# 返回:
|
||
# centers 或 (centers, boxes)
|
||
# centers: [(cx, cy), ...]
|
||
# boxes: [[x1,y1,x2,y2], ...] (np.ndarray, int)
|
||
# """
|
||
# # 模板(灰度)
|
||
# template = cv2.imread(template_path, cv2.IMREAD_GRAYSCALE)
|
||
# if template is None:
|
||
# raise FileNotFoundError(f"模板图像加载失败,请检查路径: {template_path}")
|
||
#
|
||
# # 大图(灰度)
|
||
# gray = cls._to_gray(large_image)
|
||
#
|
||
# # 模板尺寸
|
||
# tw, th = template.shape[::-1]
|
||
#
|
||
# # 模板匹配(相关系数归一化)
|
||
# result = cv2.matchTemplate(gray, template, cv2.TM_CCOEFF_NORMED)
|
||
#
|
||
# # 阈值筛选
|
||
# ys, xs = np.where(result >= threshold)
|
||
# if len(xs) == 0:
|
||
# return ([], np.empty((0, 4), dtype=int)) if return_boxes else []
|
||
#
|
||
# # 收集候选框与分数
|
||
# boxes = []
|
||
# scores = []
|
||
# for (x, y) in zip(xs, ys):
|
||
# boxes.append([x, y, x + tw, y + th])
|
||
# scores.append(result[y, x])
|
||
#
|
||
# # 按分数做 NMS
|
||
# boxes_nms = cls.non_max_suppression(boxes, scores=np.array(scores), overlapThresh=overlapThresh)
|
||
#
|
||
# # 计算中心点
|
||
# centers = [((x1 + x2) // 2, (y1 + y2) // 2) for (x1, y1, x2, y2) in boxes_nms]
|
||
#
|
||
#
|
||
#
|
||
# if return_boxes:
|
||
# return centers, boxes_nms
|
||
#
|
||
#
|
||
# return centers
|
||
|
||
@classmethod
|
||
def find_template(
|
||
cls,
|
||
template_path: str,
|
||
large_image: ArrayLikeImage,
|
||
threshold: float = 0.8,
|
||
overlapThresh: float = 0.5,
|
||
return_boxes: bool = False
|
||
) -> Union[List[Tuple[int, int]], Tuple[List[Tuple[int, int]], np.ndarray]]:
|
||
"""
|
||
在 large_image 中查找 template_path 模板的位置。
|
||
- large_image 可为文件路径、np.ndarray 或 PIL.Image
|
||
- threshold: 模板匹配阈值(TM_CCOEFF_NORMED)
|
||
- overlapThresh: NMS 重叠阈值
|
||
- return_boxes: True 时同时返回保留的框数组 (N,4)
|
||
|
||
若检测结果为空,则在相同阈值下最多重试三次(共 3 次尝试)。
|
||
返回:
|
||
centers 或 (centers, boxes)
|
||
centers: [(cx, cy), ...]
|
||
boxes: [[x1,y1,x2,y2], ...] (np.ndarray, int)
|
||
"""
|
||
|
||
if not os.path.isfile(template_path):
|
||
print(f"模板文件不存在 → {template_path}")
|
||
raise FileNotFoundError(f"模板文件不存在 → {template_path}")
|
||
|
||
size = os.path.getsize(template_path)
|
||
if size == 0:
|
||
print(f"模板文件大小为 0 → {template_path} ")
|
||
raise ValueError(f"模板文件大小为 0 → {template_path}")
|
||
# 模板(灰度)
|
||
template = cv2.imread(template_path, cv2.IMREAD_GRAYSCALE)
|
||
if template is None:
|
||
raise FileNotFoundError(f"模板图像加载失败,请检查路径: {template_path}")
|
||
|
||
# 大图(灰度)
|
||
gray = cls._to_gray(large_image)
|
||
|
||
# 模板尺寸
|
||
tw, th = template.shape[::-1]
|
||
|
||
# 内部:执行一次匹配并返回 (centers, boxes_nms)
|
||
def _match_once(cur_threshold: float):
|
||
# 模板匹配(相关系数归一化)
|
||
result = cv2.matchTemplate(gray, template, cv2.TM_CCOEFF_NORMED)
|
||
|
||
# 阈值筛选
|
||
ys, xs = np.where(result >= cur_threshold)
|
||
if len(xs) == 0:
|
||
return [], np.empty((0, 4), dtype=int)
|
||
|
||
# 收集候选框与分数
|
||
boxes = []
|
||
scores = []
|
||
for (x, y) in zip(xs, ys):
|
||
boxes.append([int(x), int(y), int(x + tw), int(y + th)])
|
||
scores.append(float(result[y, x]))
|
||
|
||
# 按分数做 NMS
|
||
boxes_nms = cls.non_max_suppression(
|
||
boxes,
|
||
scores=np.asarray(scores, dtype=np.float32),
|
||
overlapThresh=overlapThresh
|
||
)
|
||
|
||
# 计算中心点(转为 Python int)
|
||
centers = [(int((x1 + x2) // 2), int((y1 + y2) // 2))
|
||
for (x1, y1, x2, y2) in boxes_nms]
|
||
|
||
# 统一为 np.ndarray[int]
|
||
boxes_nms = np.asarray(boxes_nms, dtype=int)
|
||
return centers, boxes_nms
|
||
|
||
# ===== 重试控制(最多 3 次)=====
|
||
MAX_RETRIES = 3
|
||
THRESHOLD_DECAY = 0.0 # 如需越试越宽松,可改为 0.02~0.05;不需要则保持 0
|
||
MIN_THRESHOLD = 0.6
|
||
|
||
cur_threshold = float(threshold)
|
||
last_centers, last_boxes = [], np.empty((0, 4), dtype=int)
|
||
|
||
for attempt in range(MAX_RETRIES):
|
||
centers, boxes_nms = _match_once(cur_threshold)
|
||
if centers:
|
||
if return_boxes:
|
||
return centers, boxes_nms
|
||
return centers
|
||
|
||
# 记录最后一次(若最终失败按规范返回空)
|
||
last_centers, last_boxes = centers, boxes_nms
|
||
|
||
# 为下一次尝试准备(这里默认不衰减阈值;如需可打开 THRESHOLD_DECAY)
|
||
if attempt < MAX_RETRIES - 1 and THRESHOLD_DECAY > 0.0:
|
||
cur_threshold = max(MIN_THRESHOLD, cur_threshold - THRESHOLD_DECAY)
|
||
|
||
# 全部尝试失败
|
||
if return_boxes:
|
||
return last_centers, last_boxes
|
||
return last_centers |