【去红章+倾斜校正】在财报解析中的应用

原创已于 2025-11-12 20:49:35 修改 · 393 阅读

2 ·

本内容遵循CC 4.0 BY-SA版权协议

GEO检测

标签

#opencv #计算机视觉 #人工智能 #python

于 2025-11-12 18:12:29 首次发布

多模态大模型在金融领域的应用专栏收录该内容

3 篇文章

订阅专栏

引言
在金融财报解析中，图像预处理是不可或缺的关键步骤，尤其在处理扫描或拍摄的财务报告时。原始图像常包含干扰元素，如红色印章（盖章或水印）和倾斜（由于扫描角度不正），这些因素会严重降低光学字符识别（OCR）的准确性和后续数据分析的可靠性。通过“去红章”操作，可以有效去除印章区域，避免OCR引擎将其误识别为文本内容，从而减少错误率；而“倾斜校正”则能调整图像角度，确保文本水平对齐，提升字符识别的精度。这些预处理技术不仅优化了数据提取的效率，还确保了金融分析结果的准确性和可信度，为风险评估、投资决策等应用提供了坚实基础。
展示代码效果
去红章代码实现

def remove_red_seal(input_path_or_image, output_path=None, rows=16, cols=12):
    """
    对输入图像进行去红章,考虑到有些图比较模糊,将原始图像裁剪成rows*cols的小图,只处理有红章的区域
    input_path_or_image: 输入图像路径(str)或numpy数组(cv2图像)
    output_path: 输出图像保存路径(如果提供input_path且需要保存文件)
    rows=16, cols=12: 将原始图片裁剪成16行12列的小图;
    返回: 处理后的numpy数组(cv2图像)
    """
    def remove_red_stamp(img, coefficient=1):
        """利用OTSU阈值法去除红章"""
        # 提取红色通道
        blue_c, green_c, red_c = cv2.split(img)
        # 利用大津法自动选择阈值 
        thresh, ret = cv2.threshold(red_c, 0, 255, cv2.THRESH_OTSU)
        # 对阈值进行调整
        filter_condition = int(thresh * coefficient)  # 值越大，颜色越深
        # 创建红色印章的掩码（红色区域为255，其他区域为0）
        _, red_mask = cv2.threshold(red_c, filter_condition, 255, cv2.THRESH_BINARY)
        # 将红色区域替换为白色（255, 255, 255）
        result = img.copy()
        # 使用掩码，将红色区域设为白色
        result[red_mask == 255] = [255, 255, 255]
        return result

    def has_red_stamp(img, min_red_ratio=0.005):
        """改进的红章检测函数，可检测深浅不同的红色"""
        # 转换到HSV颜色空间
        hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
        
        # 扩展红色检测范围（覆盖深浅红色）
        lower_red1 = np.array([0, 40, 40])    # 浅红色范围 [0, 40, 40],[0, 50, 50],
        upper_red1 = np.array([15, 255, 255])  # [15, 255, 255],[12, 255, 255]
        lower_red2 = np.array([160, 40, 40])   # 深红色范围 [160, 40, 40])
        upper_red2 = np.array([180, 255, 255])
        
        # 创建红色掩码
        mask1 = cv2.inRange(hsv, lower_red1, upper_red1)
        mask2 = cv2.inRange(hsv, lower_red2, upper_red2)
        red_mask = cv2.bitwise_or(mask1, mask2)
        
        # 计算红色像素比例
        red_ratio = np.count_nonzero(red_mask) / (img.shape[0] * img.shape[1])
        return red_ratio > min_red_ratio

    # 主流程 读取图片
    if isinstance(input_path_or_image, str):
        img = cv2.imread(input_path_or_image)
        if img is None:
            raise FileNotFoundError(f"无法读取图片: {input_path_or_image}")
    else:
        # 假设输入是numpy数组
        img = input_path_or_image.copy()
    
    h, w = img.shape[:2]
    # 计算每个小图的尺寸
    tile_h = h // rows
    tile_w = w // cols
    
    # 存储处理后的所有小图
    processed_tiles = []
    
    # 遍历所有小图
    for i in range(rows):
        row_tiles = []
        for j in range(cols):
            # 裁剪小图，确保最后一行/列包含所有剩余像素
            y1 = i * tile_h
            y2 = (i + 1) * tile_h if i < rows - 1 else h
            x1 = j * tile_w
            x2 = (j + 1) * tile_w if j < cols - 1 else w
            tile = img[y1:y2, x1:x2].copy()
            
            # 检测并处理红章
            if has_red_stamp(tile):
                # 图像融合 0704 考虑到比较模糊的图片去红章后会识别不到字
                tile1 = remove_red_stamp(tile, coefficient=1)
                tile2 = remove_red_stamp(tile, coefficient=0.8)
                # 使用合理的权重融合，权重总和应该接近1.0
                tile = cv2.addWeighted(tile1, 0.5, tile2, 0.5, 0)
                
                # 原始代码
                # tile = remove_red_stamp(tile, coefficient=0.8)

            row_tiles.append(tile)
        processed_tiles.append(row_tiles)
    
    # 重新拼接图片
    reconstructed = np.vstack([ np.hstack(row_tiles) for row_tiles in processed_tiles])
    
    # 如果提供了output_path，保存结果
    if output_path is not None:
        cv2.imwrite(output_path, reconstructed)
    
    return reconstructed

倾斜校正代码实现

def correct_skew(image_path_or_image, save_path=None, delta=0.05, limit=10):
    """
    倾斜表格矫正
    image_path_or_image: 输入图像路径(str)或numpy数组(cv2图像)
    save_path: 输出图像保存路径(如果需要保存文件)
    delta: 角度步长
    limit: 角度限制范围
    返回: 处理后的numpy数组(cv2图像，保持原始颜色通道)
    """
    def rotate_image(image, angle):
        (h, w) = image.shape[: 2]
        center = (w // 2, h // 2)
        M = cv2.getRotationMatrix2D(center, angle, 1.0)
        corrected = cv2.warpAffine(image, M, (w, h), flags = cv2.INTER_CUBIC, \
            borderMode = cv2.BORDER_REPLICATE)
        return corrected
    
    def determine_score(arr):
        # arr形状: [n_angles, height, width] (灰度图堆叠)
        # 计算每行像素值的和（沿宽度方向求和）
        histogram = np.sum(arr, axis=2, dtype=float)  # 结果: [n_angles, height]
        # 计算相邻行的差值平方和（沿高度方向）
        score = np.sum((histogram[..., 1:] - histogram[..., :-1]) ** 2, \
        axis=1, dtype=float)  # 结果: [n_angles]
        return score
    
    # 主流程
    is_color = False
    if isinstance(image_path_or_image, str):
        original_image = cv2.imread(image_path_or_image)
        if original_image is None:
            raise FileNotFoundError(f"无法读取图片: {image_path_or_image}")
        is_color = len(original_image.shape) == 3
        # 转换为灰度图用于计算角度
        if is_color:
            image = cv2.cvtColor(original_image, cv2.COLOR_BGR2GRAY)
        else:
            image = original_image.copy()
    else:
        # 假设输入是numpy数组
        original_image = image_path_or_image.copy()
        if len(original_image.shape) == 3:
            is_color = True
            image = cv2.cvtColor(original_image, cv2.COLOR_BGR2GRAY)
        else:
            image = original_image.copy()
    
    # 使用灰度图计算最佳角度
    thresh = cv2.threshold(image, 0, 255, cv2.THRESH_BINARY_INV + \
    cv2.THRESH_OTSU)[1]
    angles = np.arange(-limit, limit + delta, delta)
    img_stack = np.stack([rotate_image(thresh, angle) for angle \
    in angles], axis = 0)
    scores = determine_score(img_stack)
    best_angle = angles[np.argmax(scores)]
    
    # 对原始图像（可能是彩色）进行旋转
    if is_color:
        corrected = rotate_image(original_image, best_angle)
    else:
        corrected = rotate_image(image, best_angle)
    
    # 如果需要保存，保存结果
    if save_path is not None:
        cv2.imwrite(save_path, corrected)
    
    return corrected