iOSAI/Utils/OCRUtils.py

import os
import cv2
import numpy as np
from typing import List, Tuple, Union, Optional
from PIL import Image

ArrayLikeImage = Union[np.ndarray, str, Image.Image]

class OCRUtils:
    @classmethod
    def _to_gray(cls, img: ArrayLikeImage) -> np.ndarray:
        """
        接受路径/np.ndarray/PIL.Image，统一转为灰度 np.ndarray。
        """
        # 路径
        if isinstance(img, str):
            arr = cv2.imread(img, cv2.IMREAD_GRAYSCALE)
            if arr is None:
                raise FileNotFoundError(f"图像加载失败，请检查路径: {img}")
            return arr

        # PIL.Image
        if isinstance(img, Image.Image):
            return cv2.cvtColor(np.array(img.convert("RGB")), cv2.COLOR_RGB2GRAY)

        # numpy 数组
        if isinstance(img, np.ndarray):
            if img.ndim == 2:
                return img  # 已是灰度
            if img.ndim == 3:
                return cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
            raise ValueError("不支持的图像维度（期望 2D 灰度或 3D BGR/RGB）")

        raise TypeError("large_image 类型必须是 str / np.ndarray / PIL.Image.Image")

    @classmethod
    def non_max_suppression(
        cls,
        boxes: List[List[float]],
        scores: Optional[np.ndarray] = None,
        overlapThresh: float = 0.5
    ) -> np.ndarray:
        """
        boxes: [ [x1,y1,x2,y2], ... ]
        scores: 每个框的置信度（用于“按分数做 NMS”）。若为 None，则退化为按 y2 排序的经典近似。
        返回: 经过 NMS 保留的 boxes(int) ndarray，形状 (N,4)
        """
        if len(boxes) == 0:
            return np.empty((0, 4), dtype=int)

        boxes = np.asarray(boxes, dtype=np.float32)
        x1, y1, x2, y2 = boxes.T
        areas = (x2 - x1 + 1) * (y2 - y1 + 1)

        if scores is None:
            order = np.argsort(y2)  # 经典写法
        else:
            scores = np.asarray(scores, dtype=np.float32)
            order = np.argsort(scores)[::-1]  # 分数从高到低

        keep = []
        while order.size > 0:
            i = order[0] if scores is not None else order[-1]
            keep.append(i)

            rest = order[1:] if scores is not None else order[:-1]

            xx1 = np.maximum(x1[i], x1[rest])
            yy1 = np.maximum(y1[i], y1[rest])
            xx2 = np.minimum(x2[i], x2[rest])
            yy2 = np.minimum(y2[i], y2[rest])

            w = np.maximum(0, xx2 - xx1 + 1)
            h = np.maximum(0, yy2 - yy1 + 1)
            inter = w * h
            ovr = inter / areas[rest]

            inds = np.where(ovr <= overlapThresh)[0]
            order = rest[inds]

        return boxes[keep].astype(int)

    # @classmethod
    # def find_template(
    #     cls,
    #     template_path: str,
    #     large_image: ArrayLikeImage,
    #     threshold: float = 0.8,
    #     overlapThresh: float = 0.5,
    #     return_boxes: bool = False
    # ) -> Union[List[Tuple[int, int]], Tuple[List[Tuple[int, int]], np.ndarray]]:
    #     """
    #     在 large_image 中查找 template_path 模板的位置。
    #     - large_image 可为文件路径、np.ndarray 或 PIL.Image
    #     - threshold: 模板匹配阈值（TM_CCOEFF_NORMED）
    #     - overlapThresh: NMS 重叠阈值
    #     - return_boxes: True 时同时返回保留的框数组 (N,4)
    #
    #     返回:
    #       centers 或 (centers, boxes)
    #       centers: [(cx, cy), ...]
    #       boxes:   [[x1,y1,x2,y2], ...] (np.ndarray, int)
    #     """
    #     # 模板（灰度）
    #     template = cv2.imread(template_path, cv2.IMREAD_GRAYSCALE)
    #     if template is None:
    #         raise FileNotFoundError(f"模板图像加载失败，请检查路径: {template_path}")
    #
    #     # 大图（灰度）
    #     gray = cls._to_gray(large_image)
    #
    #     # 模板尺寸
    #     tw, th = template.shape[::-1]
    #
    #     # 模板匹配（相关系数归一化）
    #     result = cv2.matchTemplate(gray, template, cv2.TM_CCOEFF_NORMED)
    #
    #     # 阈值筛选
    #     ys, xs = np.where(result >= threshold)
    #     if len(xs) == 0:
    #         return ([], np.empty((0, 4), dtype=int)) if return_boxes else []
    #
    #     # 收集候选框与分数
    #     boxes = []
    #     scores = []
    #     for (x, y) in zip(xs, ys):
    #         boxes.append([x, y, x + tw, y + th])
    #         scores.append(result[y, x])
    #
    #     # 按分数做 NMS
    #     boxes_nms = cls.non_max_suppression(boxes, scores=np.array(scores), overlapThresh=overlapThresh)
    #
    #     # 计算中心点
    #     centers = [((x1 + x2) // 2, (y1 + y2) // 2) for (x1, y1, x2, y2) in boxes_nms]
    #
    #
    #
    #     if return_boxes:
    #         return centers, boxes_nms
    #
    #
    #     return centers

    @classmethod
    def find_template(
            cls,
            template_path: str,
            large_image: ArrayLikeImage,
            threshold: float = 0.8,
            overlapThresh: float = 0.5,
            return_boxes: bool = False
    ) -> Union[List[Tuple[int, int]], Tuple[List[Tuple[int, int]], np.ndarray]]:
        """
        在 large_image 中查找 template_path 模板的位置。
        - large_image 可为文件路径、np.ndarray 或 PIL.Image
        - threshold: 模板匹配阈值（TM_CCOEFF_NORMED）
        - overlapThresh: NMS 重叠阈值
        - return_boxes: True 时同时返回保留的框数组 (N,4)

        若检测结果为空，则在相同阈值下最多重试三次（共 3 次尝试）。
        返回:
          centers 或 (centers, boxes)
          centers: [(cx, cy), ...]
          boxes:   [[x1,y1,x2,y2], ...] (np.ndarray, int)
        """

        if not os.path.isfile(template_path):
            print(f"模板文件不存在 → {template_path}")
            raise FileNotFoundError(f"模板文件不存在 → {template_path}")

        size = os.path.getsize(template_path)
        if size == 0:
            print(f"模板文件大小为 0 → {template_path} ")
            raise ValueError(f"模板文件大小为 0 → {template_path}")
        # 模板（灰度）
        template = cv2.imread(template_path, cv2.IMREAD_GRAYSCALE)
        if template is None:
            raise FileNotFoundError(f"模板图像加载失败，请检查路径: {template_path}")

        # 大图（灰度）
        gray = cls._to_gray(large_image)

        # 模板尺寸
        tw, th = template.shape[::-1]

        # 内部：执行一次匹配并返回 (centers, boxes_nms)
        def _match_once(cur_threshold: float):
            # 模板匹配（相关系数归一化）
            result = cv2.matchTemplate(gray, template, cv2.TM_CCOEFF_NORMED)

            # 阈值筛选
            ys, xs = np.where(result >= cur_threshold)
            if len(xs) == 0:
                return [], np.empty((0, 4), dtype=int)

            # 收集候选框与分数
            boxes = []
            scores = []
            for (x, y) in zip(xs, ys):
                boxes.append([int(x), int(y), int(x + tw), int(y + th)])
                scores.append(float(result[y, x]))

            # 按分数做 NMS
            boxes_nms = cls.non_max_suppression(
                boxes,
                scores=np.asarray(scores, dtype=np.float32),
                overlapThresh=overlapThresh
            )

            # 计算中心点（转为 Python int）
            centers = [(int((x1 + x2) // 2), int((y1 + y2) // 2))
                       for (x1, y1, x2, y2) in boxes_nms]

            # 统一为 np.ndarray[int]
            boxes_nms = np.asarray(boxes_nms, dtype=int)
            return centers, boxes_nms

        # ===== 重试控制（最多 3 次）=====
        MAX_RETRIES = 3
        THRESHOLD_DECAY = 0.0  # 如需越试越宽松，可改为 0.02~0.05；不需要则保持 0
        MIN_THRESHOLD = 0.6

        cur_threshold = float(threshold)
        last_centers, last_boxes = [], np.empty((0, 4), dtype=int)

        for attempt in range(MAX_RETRIES):
            centers, boxes_nms = _match_once(cur_threshold)
            if centers:
                if return_boxes:
                    return centers, boxes_nms
                return centers

            # 记录最后一次（若最终失败按规范返回空）
            last_centers, last_boxes = centers, boxes_nms

            # 为下一次尝试准备（这里默认不衰减阈值；如需可打开 THRESHOLD_DECAY）
            if attempt < MAX_RETRIES - 1 and THRESHOLD_DECAY > 0.0:
                cur_threshold = max(MIN_THRESHOLD, cur_threshold - THRESHOLD_DECAY)

        # 全部尝试失败
        if return_boxes:
            return last_centers, last_boxes
        return last_centers